← Back to Index

openshift install cnv with ocs and external ceph

本次测试的目标业务场景是,一个CS服务的虚机,用CNV跑在openshift上,虚机的镜像承载在ceph上,并测试虚机热迁移和虚机克隆场景。

由于测试环境所限,我们配置一个单节点的ceph,挂3个5.5T的盘,这个ceph节点,就用kvm,配置了16C32G,实际跑下来,感觉8C32G也是够的。

部署架构图

diag

视频讲解

单节点ceph安装,ocs安装,对接外部ceph存储

cnv安装,导入虚机镜像,热迁移,克隆

install ceph

我们先安装这个ceph节点。


        #####################################
        
        ## start to install ceph
        
        cd /backup/wzh
        
        lvremove -f ocp4/cephlv
        lvcreate -y -L 230G -n cephlv ocp4
        
        lvremove -f ocp4/cephdata01lv
        lvcreate -y -L 3T -n cephdata01lv ocp4
        
        lvremove -f ocp4/cephdata02lv
        lvcreate -y -L 3T -n cephdata02lv ocp4
        
        lvremove -f ocp4/cephdata03lv
        lvcreate -y -L 3T -n cephdata03lv ocp4
        
        virt-install --name=ocp4-ceph --vcpus=16 --ram=32768 \
        --disk path=/dev/ocp4/cephlv,device=disk,bus=virtio,format=raw \
        --disk path=/dev/ocp4/cephdata01lv,device=disk,bus=virtio,format=raw \
        --disk path=/dev/ocp4/cephdata02lv,device=disk,bus=virtio,format=raw \
        --disk path=/dev/ocp4/cephdata03lv,device=disk,bus=virtio,format=raw \
        --os-variant centos7.0 --network network:openshift4,model=virtio \
        --boot menu=on --location /home/data/openshift/ocp.4.3.21/rhel-server-7.8-x86_64-dvd.iso \
        --initrd-inject rhel-ks-ceph.cfg --extra-args "inst.ks=file:/rhel-ks-ceph.cfg" 
        
        #######################################
        
        #  kvm's host bond and vlan
        
        # https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/networking_guide/sec-configure_802_1q_vlan_tagging_using_the_command_line_tool_nmcli
        
        # https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/7/html/networking_guide/sec-vlan_on_bond_and_bridge_using_the_networkmanager_command_line_tool_nmcli
        
        nmcli con add type bond \
            con-name bond-24 \
            ifname bond-24 \
            mode 802.3ad ipv4.method disabled ipv6.method ignore
            
        nmcli con mod id bond-24 bond.options \
            mode=802.3ad,miimon=100,lacp_rate=fast,xmit_hash_policy=layer2+3
            
        nmcli con add type bond-slave ifname enp176s0f0 con-name enp176s0f0 master bond-24
        nmcli con add type bond-slave ifname enp59s0f0 con-name enp59s0f0 master bond-24
        
        nmcli con up bond-24
        
        nmcli connection add type bridge con-name br-ceph ifname br-ceph ip4 192.168.18.200/24
        
        nmcli con up br-ceph
        
        nmcli con add type vlan con-name vlan-ceph ifname vlan-ceph dev bond-24 id 501 master br-ceph slave-type bridge
        
        nmcli con up vlan-ceph
        
        # no need below
        
        # cat << EOF >  /backup/wzh/virt-net.xml
        
        # <network>
        
        #   <name>vm-br-ceph</name>
        
        #   <forward mode='bridge'>
        
        #     <bridge name='br-ceph'/>
        
        #   </forward>
        
        # </network>
        
        # EOF
        
        # virsh net-define --file virt-net.xml
        
        # virsh net-autostart br-ceph
        
        # virsh net-start br-ceph
        
        # virsh net-list
        
        # # restore
        
        # virsh net-undefine br-ceph
        
        # virsh net-destroy br-ceph
        
        cat << EOF > /root/.ssh/config
        StrictHostKeyChecking no
        UserKnownHostsFile=/dev/null
        
        EOF
        
        # restore
        
        nmcli con del vlan-ceph
        nmcli con del br-ceph
        nmcli con del enp59s0f0
        nmcli con del enp176s0f0
        nmcli con del bond-24
        
        ########################################
        
        # go to ceph vm
        
        # https://www.cyberciti.biz/faq/linux-list-network-cards-command/
        
        cat /proc/net/dev
        
        nmcli con add type ethernet ifname eth1 con-name eth1
        nmcli con modify eth1 ipv4.method manual ipv4.addresses 192.168.18.203/24
        nmcli con modify eth1 connection.autoconnect yes
        nmcli con reload
        nmcli con up eth1
        
        # restore
        
        nmcli con del eth1
        
        ##########################################
        
        # go to worker2 vm, to test the ceph vlan
        
        nmcli con add type ethernet ifname ens9 con-name ens9
        nmcli con modify ens9 ipv4.method manual ipv4.addresses 192.168.18.209/24
        nmcli con modify ens9 connection.autoconnect yes
        nmcli con reload
        nmcli con up ens9
        
        # restore
        
        nmcli con del ens9
        nmcli con del 'Wired connection 1'
        
        ##########################################
        
        # go to worker1 vm, to test the ceph vlan
        
        nmcli con add type ethernet ifname ens9 con-name ens9
        nmcli con modify ens9 ipv4.method manual ipv4.addresses 192.168.18.208/24
        nmcli con modify ens9 connection.autoconnect yes
        nmcli con reload
        nmcli con up ens9
        
        # restore
        
        nmcli con del ens9
        nmcli con del 'Wired connection 1'
        
        ##########################################
        
        # go to worker0 vm, to test the ceph vlan
        
        nmcli con add type ethernet ifname ens9 con-name ens9
        nmcli con modify ens9 ipv4.method manual ipv4.addresses 192.168.18.207/24
        nmcli con modify ens9 connection.autoconnect yes
        nmcli con reload
        nmcli con up ens9
        
        ##########################################
        
        # go to master2 vm, to test the ceph vlan
        
        nmcli con add type ethernet ifname ens9 con-name ens9
        nmcli con modify ens9 ipv4.method manual ipv4.addresses 192.168.18.206/24
        nmcli con modify ens9 connection.autoconnect yes
        nmcli con reload
        nmcli con up ens9
        
        # restore
        
        nmcli con del ens9
        nmcli con del 'Wired connection 1'
        
        ##########################################
        
        # go to master1 vm, to test the ceph vlan
        
        nmcli con add type ethernet ifname ens9 con-name ens9
        nmcli con modify ens9 ipv4.method manual ipv4.addresses 192.168.18.205/24
        nmcli con modify ens9 connection.autoconnect yes
        nmcli con reload
        nmcli con up ens9
        
        # restore
        
        nmcli con del ens9
        nmcli con del 'Wired connection 1'
        
        ##########################################
        
        # go to master0 vm, to test the ceph vlan
        
        nmcli con add type ethernet ifname ens9 con-name ens9
        nmcli con modify ens9 ipv4.method manual ipv4.addresses 192.168.18.204/24
        nmcli con modify ens9 connection.autoconnect yes
        nmcli con reload
        nmcli con up ens9
        
        # restore
        
        nmcli con del ens9
        nmcli con del 'Wired connection 1'
        
        ##########################################
        
        # go to worker4 baremetal, to test the ceph vlan
        
        nmcli con del 'Wired connection 1'
        nmcli con del 'Wired connection 2'
        nmcli con del 'Wired connection 3'
        nmcli con del 'Wired connection 4'
        nmcli con del 'Wired connection 5'
        nmcli con del ens35f0.991
        nmcli con del ens35f1
        
        # https://access.redhat.com/solutions/1526613
        
        nmcli con add type bond \
            con-name bond-24 \
            ifname bond-24 \
            mode 802.3ad ipv4.method disabled ipv6.method ignore
            
        nmcli con mod id bond-24 bond.options \
            mode=802.3ad,miimon=100,lacp_rate=fast,xmit_hash_policy=layer2+3
            
        nmcli con add type bond-slave ifname ens49f0 con-name ens49f0 master bond-24
        nmcli con add type bond-slave ifname ens35f0 con-name ens35f0 master bond-24
        
        nmcli con up bond-24
        
        nmcli con add type vlan con-name vlan-ceph ifname vlan-ceph dev bond-24 id 501 ip4 192.168.18.211/24
        
        nmcli con up vlan-ceph
        
        # restore
        
        nmcli con del vlan-ceph
        nmcli con del ens49f0 ens35f0
        nmcli con del bond-24
        
        #############################################
        
        # go to worker3 baremetal, to test the ceph vlan
        
        nmcli con del 'Wired connection 1'
        nmcli con del 'Wired connection 2'
        nmcli con del 'Wired connection 3'
        nmcli con del 'Wired connection 4'
        nmcli con del 'Wired connection 5'
        
        nmcli con add type bond \
            con-name bond-24 \
            ifname bond-24 \
            mode 802.3ad ipv4.method disabled ipv6.method ignore
            
        nmcli con mod id bond-24 bond.options \
            mode=802.3ad,miimon=100,lacp_rate=fast,xmit_hash_policy=layer2+3
            
        nmcli con add type bond-slave ifname ens49f0 con-name ens49f0 master bond-24
        nmcli con add type bond-slave ifname ens35f0 con-name ens35f0 master bond-24
        
        nmcli con up bond-24
        
        nmcli con add type vlan con-name vlan-ceph ifname vlan-ceph dev bond-24 id 501  ip4 192.168.18.210/24
        
        nmcli con up vlan-ceph
        
        # restore
        
        nmcli con del vlan-ceph
        nmcli con del ens49f0 ens35f0
        nmcli con del bond-24
        
        
        #################################################
        
        ## for ceph vm
        
        # install a 'fast' http proxy, then
        
        subscription-manager --proxy=127.0.0.1:6666 register --username **** --password ********
        
        # subscription-manager --proxy=127.0.0.1:6666 refresh
        
        subscription-manager config --rhsm.baseurl=https://china.cdn.redhat.com
        
        # subscription-manager config --rhsm.baseurl=https://cdn.redhat.com
        
        subscription-manager --proxy=127.0.0.1:6666 refresh
        
        # https://access.redhat.com/documentation/en-us/red_hat_ceph_storage/4/html-single/installation_guide/index
        
        subscription-manager --proxy=127.0.0.1:6666 repos --disable=*
        subscription-manager --proxy=127.0.0.1:6666 repos --enable=rhel-7-server-rpms \
        --enable=rhel-7-server-extras-rpms \
        --enable=rhel-7-server-supplementary-rpms \
        --enable=rhel-7-server-optional-rpms \
        --enable=rhel-7-server-rhceph-4-tools-rpms --enable=rhel-7-server-ansible-2.8-rpms \
        --enable=rhel-7-server-rhceph-4-mon-rpms \
        --enable=rhel-7-server-rhceph-4-osd-rpms \
        --enable=rhel-7-server-rhceph-4-tools-rpms 
        
        
        yum clean all
        yum makecache
        
        yum update -y
        
        systemctl enable --now firewalld
        systemctl start firewalld
        systemctl status firewalld
        
        firewall-cmd --zone=public --add-port=6789/tcp
        firewall-cmd --zone=public --add-port=6789/tcp --permanent
        firewall-cmd --zone=public --add-port=6800-7300/tcp
        firewall-cmd --zone=public --add-port=6800-7300/tcp --permanent
        firewall-cmd --zone=public --add-port=6800-7300/tcp
        firewall-cmd --zone=public --add-port=6800-7300/tcp --permanent
        firewall-cmd --zone=public --add-port=6800-7300/tcp
        firewall-cmd --zone=public --add-port=6800-7300/tcp --permanent
        firewall-cmd --zone=public --add-port=8080/tcp
        firewall-cmd --zone=public --add-port=8080/tcp --permanent
        firewall-cmd --zone=public --add-port=443/tcp
        firewall-cmd --zone=public --add-port=443/tcp --permanent
        
        # firewall-cmd --zone=public --add-port=9090/tcp
        
        # firewall-cmd --zone=public --add-port=9090/tcp --permanent
        
        ssh-keygen
        
        sed -i 's/#UseDNS yes/UseDNS no/' /etc/ssh/sshd_config
        systemctl restart sshd
        
        ssh-copy-id root@ceph
        
        yum install -y ceph-ansible docker
        
        cd /usr/share/ceph-ansible
        
        # yum install -y docker
        
        systemctl enable --now docker
        
        cd /usr/share/ceph-ansible
        /bin/cp -f  group_vars/all.yml.sample group_vars/all.yml
        /bin/cp -f  group_vars/osds.yml.sample group_vars/osds.yml
        /bin/cp -f  site-docker.yml.sample site-docker.yml
        /bin/cp -f  site.yml.sample site.yml
        /bin/cp -f  group_vars/rgws.yml.sample group_vars/rgws.yml
        /bin/cp -f  group_vars/mdss.yml.sample group_vars/mdss.yml
        
        # remember to set the env
        
        # https://access.redhat.com/RegistryAuthentication
        
        # REGISTRY_USER_NAME=
        
        # REGISTRY_TOKEN=
        
        cat << EOF > ./group_vars/all.yml
        fetch_directory: ~/ceph-ansible-keys
        monitor_interface: eth1 
        public_network: 192.168.18.0/24
        
        # ceph_docker_image: rhceph/rhceph-4-rhel8
        
        # ceph_docker_image_tag: "latest"
        
        # containerized_deployment: true
        
        ceph_docker_registry: registry.redhat.io
        ceph_docker_registry_auth: true
        ceph_docker_registry_username: ${REGISTRY_USER_NAME}
        ceph_docker_registry_password: ${REGISTRY_TOKEN}
        ceph_origin: repository
        ceph_repository: rhcs
        
        # ceph_repository_type: cdn
        
        ceph_repository_type: iso
        ceph_rhcs_iso_path: /root/rhceph-4.1-rhel-7-x86_64.iso
        ceph_rhcs_version: 4
        bootstrap_dirs_owner: "167"
        bootstrap_dirs_group: "167"
        dashboard_admin_user: admin
        dashboard_admin_password: Redhat!23
        node_exporter_container_image: registry.redhat.io/openshift4/ose-prometheus-node-exporter:v4.1
        grafana_admin_user: admin
        grafana_admin_password: Redhat!23
        grafana_container_image: registry.redhat.io/rhceph/rhceph-4-dashboard-rhel8
        prometheus_container_image: registry.redhat.io/openshift4/ose-prometheus:4.1
        alertmanager_container_image: registry.redhat.io/openshift4/ose-prometheus-alertmanager:4.1
        radosgw_interface: eth1
        radosgw_address_block: 192.168.18.0/24
        radosgw_civetweb_port: 8080
        radosgw_civetweb_num_threads: 512
        ceph_conf_overrides:
          global:
            osd_pool_default_size: 3
            osd_pool_default_min_size: 2
            osd_pool_default_pg_num: 32
            osd_pool_default_pgp_num: 32
          osd:
           osd_scrub_begin_hour: 22
           osd_scrub_end_hour: 7
        
        EOF
        
        cat << EOF > ./group_vars/osds.yml
        devices:
          - /dev/vdb
          - /dev/vdc
          - /dev/vdd
        EOF
        
        cat << EOF > ./hosts
        [grafana-server]
        ceph
        [mons]
        ceph
        [osds]
        ceph
        [mgrs]
        ceph
        
        EOF
        
        sed -i "s/#copy_admin_key: false/copy_admin_key: true/" ./group_vars/rgws.yml
        
        cd /usr/share/ceph-ansible
        
        mkdir -p ~/ceph-ansible-keys
        ansible all -m ping -i hosts
        
        ansible-playbook -vv site.yml -i hosts
        
        #  You can access your dashboard web UI at http://ceph:8443/ as an 'admin' user with 'Redhat!23' password
        
        cd /root
        ceph osd getcrushmap -o crushmap
        crushtool -d crushmap -o crushmap.txt
        sed -i 's/step chooseleaf firstn 0 type host/step chooseleaf firstn 0 type osd/' crushmap.txt
        grep 'step chooseleaf' crushmap.txt
        crushtool -c crushmap.txt -o crushmap-new
        ceph osd setcrushmap -i crushmap-new
        cd /usr/share/ceph-ansible
        
        # test the result
        
        ceph health detail
        ceph osd pool create test 8
        ceph osd pool set test pg_num 128
        ceph osd pool set test pgp_num 128
        ceph osd pool application enable test rbd
        ceph -s
        ceph osd tree
        ceph osd pool ls
        ceph pg dump
        cat << EOF > hello-world.txt
        wangzheng
        EOF
        rados --pool test put hello-world hello-world.txt
        rados --pool test get hello-world fetch.txt
        cat fetch.txt
        
        # continue to install
        
        cat << EOF >> ./hosts
        [rgws]
        ceph
        [mdss]
        ceph
        
        EOF
        
        ansible-playbook -vv site.yml --limit mdss -i hosts
        
        ansible-playbook -vv site.yml --limit rgws -i hosts
        
        # change mon param for S3
        
        # 416 (InvalidRange)
        
        # https://www.cnblogs.com/flytor/p/11380026.html
        
        # https://www.cnblogs.com/fuhai0815/p/12144214.html
        
        # https://access.redhat.com/solutions/3328431
        
        # add config line
        
        vi /etc/ceph/ceph.conf
        
        # mon_max_pg_per_osd = 300
        
        systemctl restart ceph-mon@ceph.service
        
        ceph tell mon.* injectargs '--mon_max_pg_per_osd=1000'
        
        ceph --admin-daemon /var/run/ceph/ceph-mon.`hostname -s`.asok config show | grep mon_max_pg_per_osd
        
        ceph --admin-daemon /var/run/ceph/ceph-mgr.`hostname -s`.asok config set mon_max_pg_per_osd 1000
        
        ceph osd lspools
        ceph osd dump | grep 'replicated size'

install ocs

接下来在openshift4里面安装ocs组件,来对接之前安装的ceph节点。


        # check ceph versino
        
        ceph tell osd.* version
        
        python ceph-external-cluster-details-exporter.py --help
        
        python ceph-external-cluster-details-exporter.py --rbd-data-pool-name test --rgw-endpoint 192.168.18.203:8080 --run-as-user client.ocs
        
        # [{"kind": "ConfigMap", "data": {"maxMonId": "0", "data": "ceph=192.168.18.203:6789", "mapping": "{}"}, "name": "rook-ceph-mon-endpoints"}, {"kind": "Secret", "data": {"mon-secret": "mon-secret", "fsid": "bfaeb4fb-2f44-41e7-9539-1ca75bb394a8", "cluster-name": "openshift-storage", "admin-secret": "admin-secret"}, "name": "rook-ceph-mon"}, {"kind": "Secret", "data": {"userKey": "AQBZUWdfavnEDBAA0qwn1WLRbFV+0bUY+8ZnMQ==", "userID": "client.ocs"}, "name": "rook-ceph-operator-creds"}, {"kind": "Secret", "data": {"userKey": "AQBZUWdfC1EzDhAAjVV7+S3jKk8LcPUxxkIF9A==", "userID": "csi-rbd-node"}, "name": "rook-csi-rbd-node"}, {"kind": "StorageClass", "data": {"pool": "test"}, "name": "ceph-rbd"}, {"kind": "Secret", "data": {"userKey": "AQBZUWdfG8pvEBAAnldlqNj72gqBRvSxc8FB+g==", "userID": "csi-rbd-provisioner"}, "name": "rook-csi-rbd-provisioner"}, {"kind": "Secret", "data": {"adminID": "csi-cephfs-provisioner", "adminKey": "AQBZUWdfCxXWExAAiiaU1KIyjFsBxZB6h9WVtw=="}, "name": "rook-csi-cephfs-provisioner"}, {"kind": "Secret", "data": {"adminID": "csi-cephfs-node", "adminKey": "AQBZUWdf52L9ERAAXbK5upV2lO5phttDrwzJyg=="}, "name": "rook-csi-cephfs-node"}, {"kind": "StorageClass", "data": {"pool": "cephfs_data", "fsName": "cephfs"}, "name": "cephfs"}, {"kind": "StorageClass", "data": {"endpoint": "192.168.18.203:8080", "poolPrefix": "default"}, "name": "ceph-rgw"}]
        
        oc get cephcluster -n openshift-storage
        
        oc get storagecluster -n openshift-storage
        
        # install chrome on kvm host
        
        wget https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm
        yum install ./google-chrome-stable_current_*.rpm
        google-chrome &

install cnv


        # upload win10.qcow2 to http server(helper)
        
        scp win10.qcow2.gz root@192.168.8.202:/var/www/html/
        
        # on helper
        
        chmod 644 /var/www/html/win10.qcow2.gz
        
        oc project demo
        cat << EOF > win10.dv.yaml
        apiVersion: cdi.kubevirt.io/v1alpha1
        kind: DataVolume
        metadata:
          name: "example-import-dv-win10"
        spec:
          source:
              http:
                 url: "http://192.168.8.202:8080/win10.qcow2.gz" 
          pvc:
            volumeMode: Block
            storageClassName: ocs-external-storagecluster-ceph-rbd
            accessModes:
              - ReadWriteMany
            resources:
              requests:
                storage: "40Gi"
        EOF
        oc apply -n demo -f win10.dv.yaml
        
        oc get dv,pvc
        
        # create a vm, and test the live migration
        
        ###############################################################
        
        # network
        
        #####################################
        
        # worker4 baremetal, nic bond + vlan + bridge for business
        
        nmcli con add type bond \
            con-name bond-13 \
            ifname bond-13 \
            mode 802.3ad ipv4.method disabled ipv6.method ignore
            
        nmcli con mod id bond-13 bond.options \
            mode=802.3ad,miimon=100,lacp_rate=fast,xmit_hash_policy=layer2+3
            
        nmcli con add type bond-slave ifname ens49f1 con-name ens49f1 master bond-13
        nmcli con add type bond-slave ifname ens35f1 con-name ens35f1 master bond-13
        
        nmcli con up bond-13
        
        nmcli connection add type bridge con-name br-business ifname br-business ip4 172.17.4.211/24
        
        nmcli con up br-business
        
        nmcli con add type vlan con-name vlan-business ifname vlan-business dev bond-13 id 991 master br-business slave-type bridge
        
        nmcli con up vlan-business
        
        #####################################
        
        # worker4 baremetal, nic bond + vlan + bridge for business
        
        nmcli con add type bond \
            con-name bond-13 \
            ifname bond-13 \
            mode 802.3ad ipv4.method disabled ipv6.method ignore
            
        nmcli con mod id bond-13 bond.options \
            mode=802.3ad,miimon=100,lacp_rate=fast,xmit_hash_policy=layer2+3
            
        nmcli con add type bond-slave ifname ens49f1 con-name ens49f1 master bond-13
        nmcli con add type bond-slave ifname ens35f1 con-name ens35f1 master bond-13
        
        nmcli con up bond-13
        
        nmcli connection add type bridge con-name br-business ifname br-business ip4 172.17.4.210/24
        
        nmcli con up br-business
        
        nmcli con add type vlan con-name vlan-business ifname vlan-business dev bond-13 id 991 master br-business slave-type bridge
        
        nmcli con up vlan-business
        
        ###############################
        
        # try to add 2nd nic
        
        cat << EOF > nic.vm.yaml
        apiVersion: "k8s.cni.cncf.io/v1"
        kind: NetworkAttachmentDefinition
        metadata:
          name: bridge-network-business
          annotations:
            k8s.v1.cni.cncf.io/resourceName: bridge.network.kubevirt.io/br-business 
        spec:
          config: '{
            "cniVersion": "0.3.1",
            "name": "bridge-network-business", 
            "plugins": [
              {
                "type": "cnv-bridge", 
                "bridge": "br-business" 
              },
              {
                "type": "cnv-tuning" 
              }
            ]
          }'
        EOF

CS 游戏业务场景测试


        ###################################
        
        # add management vlan to kvm host
        
        nmcli con add type vlan con-name vlan-management ifname vlan-management dev bond-24 id 500  ip4 1.41.0.124/27
        
        nmcli con up vlan-management
        
        #restore
        nmcli con del vlan-management
        
        # upload cs server image
        
        # for python3
        
        python -m http.server 7800
        
        # for python2
        
        python -m SimpleHTTPServer 7800
        
        oc project demo
        cat << EOF > cnv.cs.dv.yaml
        apiVersion: cdi.kubevirt.io/v1alpha1
        kind: DataVolume
        metadata:
          name: "import-dv-cs-yitu"
        spec:
          source:
              http:
                 url: "http://192.168.8.251:7800/yitu.raw" 
          pvc:
            volumeMode: Block
            storageClassName: ocs-external-storagecluster-ceph-rbd
            accessModes:
              - ReadWriteMany
            resources:
              requests:
                storage: "150Gi"
        EOF
        oc apply -n demo -f cnv.cs.dv.yaml
        
        oc get dv,pvc
        

业务测试服务器是一个CS业务,还是个ubuntu14,我们启动这个虚机,并且配置他的网络。 interface /etc/network/interfaces.d/eth0.cfg for cs server (ubuntu 14)


        # The primary network interface
        
        auto eth0
        iface eth0 inet static
            address 172.17.4.215
            netmask 255.255.255.0
            gateway 172.17.4.254
            dns-nameservers 114.114.114.114
ifdown eth0
        ifup eth0

cnv live migration


        # upload cs server image
        
        # for python3
        
        python -m http.server 7800
        
        # for python2
        
        python -m SimpleHTTPServer 7800
        
        oc project demo
        cat << EOF > cnv.cs.dv.yaml
        apiVersion: cdi.kubevirt.io/v1alpha1
        kind: DataVolume
        metadata:
          name: "import-dv-rhel-78"
        spec:
          source:
              http:
                 url: "http://192.168.8.251:7800/rhel7.8.img" 
          pvc:
            volumeMode: Block
            storageClassName: ocs-external-storagecluster-ceph-rbd
            accessModes:
              - ReadWriteMany
            resources:
              requests:
                storage: "10Gi"
        EOF
        oc apply -n demo -f cnv.cs.dv.yaml
        
        oc get dv,pvc
        
        ############################################
        
        # try to debug the vm stuck after node failure, but find out this is not working.
        
        # we try to decrease the pdb, but no use, vm still not move to another node. 
        
        oc get pdb -n demo
        
        # NAME                               MIN AVAILABLE   MAX UNAVAILABLE   ALLOWED DISRUPTIONS   AGE
        
        # kubevirt-disruption-budget-j5zlc   2               N/A               0                     12m
        
        # kubevirt-disruption-budget-qsk9j   2               N/A               0                     12m
        
        oc patch pdb kubevirt-disruption-budget-j5zlc -n demo --type=merge -p '{"spec":{"minAvailable":0}}'
        oc patch pdb kubevirt-disruption-budget-qsk9j -n demo --type=merge -p '{"spec":{"minAvailable":0}}'
        
        # Cannot evict pod as it would violate the pod's disruption budget.
        
        oc adm drain worker-3.ocp4.redhat.ren --grace-period=10 --force --delete-local-data --ignore-daemonsets
        
        oc adm uncordon worker-3.ocp4.redhat.ren

debug for node failure senario


        # evictionStrategy: LiveMigrate
        
        # power off and power on the VM 
        
        # remove evictionStrategy: LiveMigrate settings, 
        
        # and find out this doesn't work
        
        oc patch -n demo vm/rhel78 --type json  -p '[{"op": "remove", "path": "/spec/template/spec/evictionStrategy"}]'
        oc get vm/rhel78 -o yaml | grep evictionStrategy
        
        # restore evictionStrategy: LiveMigrate settings
        
        oc patch -n demo vm/rhel78 --type=merge -p '{"spec": {"template": {"spec": {"evictionStrategy":"LiveMigrate"} } } }'
        
        # oc delete pod -n openshift-storage noobaa-db-0 --force --grace-period=0
        
        # oc get pod -n openshift-storage
        
        # no result out for these 2 command
        
        oc get pod/virt-launcher-rhel78-r6d9m -o yaml | grep -A2 finalizer
        oc get vm/rhel78 -o yaml | grep -A2 finalizer
        
        # we can see there are finalizers on vmi
        
        oc get vmi/rhel78 -o yaml | grep -A2 finalizer
          # finalizers:
          # - foregroundDeleteVirtualMachine
          # generation: 20
        
        # poweroff the node, to reproduce the issue
        
        # when the node is notready, and pod is terminating
        
        oc get node
        
        # NAME                       STATUS     ROLES        AGE    VERSION
        
        # master-0.ocp4.redhat.ren   Ready      master       102d   v1.18.3+6c42de8
        
        # master-1.ocp4.redhat.ren   Ready      master       102d   v1.18.3+6c42de8
        
        # master-2.ocp4.redhat.ren   Ready      master       102d   v1.18.3+6c42de8
        
        # worker-0.ocp4.redhat.ren   Ready      worker       102d   v1.18.3+6c42de8
        
        # worker-1.ocp4.redhat.ren   Ready      worker       102d   v1.18.3+6c42de8
        
        # worker-2.ocp4.redhat.ren   Ready      worker       102d   v1.18.3+6c42de8
        
        # worker-3.ocp4.redhat.ren   NotReady   cnv,worker   93d    v1.18.3+6c42de8
        
        # worker-4.ocp4.redhat.ren   Ready      cnv,worker   91d    v1.18.3+6c42de8
        
        oc get pod
        
        # NAME                          READY   STATUS        RESTARTS   AGE
        
        # v2v-vmware-568b875554-lsj57   1/1     Running       0          2d6h
        
        # virt-launcher-rhel78-r6d9m    1/1     Terminating   0          44m
        
        oc get vmi
        
        # NAME     AGE   PHASE     IP               NODENAME
        
        # rhel78   52m   Running   172.17.4.15/24   worker-3.ocp4.redhat.ren
        
        # below is working
        
        oc patch -n demo vmi/rhel78 --type=merge -p '{"metadata": {"finalizers":null}}'
        
        # after node failure, delete vmi
        
        oc delete vmi/rhel78
        oc get pod
        
        # NAME                          READY   STATUS        RESTARTS   AGE
        
        # v2v-vmware-568b875554-lsj57   1/1     Running       0          2d6h
        
        # virt-launcher-rhel78-f5ltc    1/1     Running       0          32s
        
        # virt-launcher-rhel78-r6d9m    1/1     Terminating   0          46m
        
        # no use below, because we are bare mental.
        
        cat << EOF > healthcheck.yaml
        apiVersion: machine.openshift.io/v1beta1
        kind: MachineHealthCheck
        metadata:
          name: example 
          namespace: openshift-machine-api
        spec:
          selector:
            matchLabels:
              machine.openshift.io/cluster-api-machine-role: cnv
          unhealthyConditions:
          - type:    "Ready"
            timeout: "300s" 
            status: "False"
          - type:    "Ready"
            timeout: "300s" 
            status: "Unknown"
          maxUnhealthy: "80%" 
        EOF
        oc apply -f healthcheck.yaml
        oc get MachineHealthCheck -n openshift-machine-api
        
        # NAME      MAXUNHEALTHY   EXPECTEDMACHINES   CURRENTHEALTHY
        
        # example   80%

其他备忘

oc get nns worker-4.ocp4.redhat.ren -o yaml
apiVersion: nmstate.io/v1alpha1
        kind: NodeNetworkState
        metadata:
          creationTimestamp: "2020-09-16T03:15:51Z"
          generation: 1
          managedFields:
          - apiVersion: nmstate.io/v1alpha1
            fieldsType: FieldsV1
            fieldsV1:
              f:metadata:
                f:ownerReferences:
                  .: {}
                  k:{"uid":"135e4844-bf87-465a-8f6a-5fc1f85e5beb"}:
                    .: {}
                    f:apiVersion: {}
                    f:kind: {}
                    f:name: {}
                    f:uid: {}
              f:status:
                .: {}
                f:currentState:
                  .: {}
                  f:dns-resolver:
                    .: {}
                    f:config:
                      .: {}
                      f:search: {}
                      f:server: {}
                    f:running:
                      .: {}
                      f:search: {}
                      f:server: {}
                  f:interfaces: {}
                  f:route-rules:
                    .: {}
                    f:config: {}
                  f:routes:
                    .: {}
                    f:config: {}
                    f:running: {}
                f:lastSuccessfulUpdateTime: {}
            manager: kubernetes-nmstate
            operation: Update
            time: "2020-09-23T01:38:50Z"
          name: worker-4.ocp4.redhat.ren
          ownerReferences:
          - apiVersion: v1
            kind: Node
            name: worker-4.ocp4.redhat.ren
            uid: 135e4844-bf87-465a-8f6a-5fc1f85e5beb
          resourceVersion: "43763614"
          selfLink: /apis/nmstate.io/v1alpha1/nodenetworkstates/worker-4.ocp4.redhat.ren
          uid: 095a8223-d139-4add-9fcf-0e0435191f78
        status:
          currentState:
            dns-resolver:
              config:
                search: []
                server:
                - 192.168.8.202
              running:
                search: []
                server:
                - 192.168.8.202
            interfaces:
            - ipv4:
                dhcp: false
                enabled: false
              ipv6:
                autoconf: false
                dhcp: false
                enabled: false
              link-aggregation:
                mode: 802.3ad
                options:
                  ad_actor_system: "00:00:00:00:00:00"
                  lacp_rate: fast
                  miimon: "100"
                  xmit_hash_policy: layer2+3
                slaves:
                - ens49f1
                - ens35f1
              mac-address: B8:59:9F:EF:71:5D
              mtu: 1500
              name: bond-13
              state: up
              type: bond
            - ipv4:
                dhcp: false
                enabled: false
              ipv6:
                autoconf: false
                dhcp: false
                enabled: false
              link-aggregation:
                mode: 802.3ad
                options:
                  ad_actor_system: "00:00:00:00:00:00"
                  lacp_rate: fast
                  miimon: "100"
                  xmit_hash_policy: layer2+3
                slaves:
                - ens49f0
                - ens35f0
              mac-address: B8:59:9F:EF:71:5C
              mtu: 1500
              name: bond-24
              state: up
              type: bond
            - bridge:
                options:
                  group-forward-mask: 0
                  mac-ageing-time: 300
                  multicast-snooping: true
                  stp:
                    enabled: true
                    forward-delay: 15
                    hello-time: 2
                    max-age: 20
                    priority: 32768
                port:
                - name: vlan-business
                  stp-hairpin-mode: false
                  stp-path-cost: 100
                  stp-priority: 32
              ipv4:
                address:
                - ip: 172.17.4.211
                  prefix-length: 24
                dhcp: false
                enabled: true
              ipv6:
                address:
                - ip: fe80::1a6a:4414:8fec:940e
                  prefix-length: 64
                auto-dns: true
                auto-gateway: true
                auto-routes: true
                autoconf: true
                dhcp: true
                enabled: true
              mac-address: B8:59:9F:EF:71:5D
              mtu: 1500
              name: br-business
              state: up
              type: linux-bridge
            - ipv4:
                enabled: false
              ipv6:
                enabled: false
              mac-address: 1e:d4:cc:be:5e:49
              mtu: 1450
              name: br0
              state: down
              type: ovs-interface
            - ethernet:
                auto-negotiation: true
                duplex: full
                speed: 10000
                sr-iov:
                  total-vfs: 0
                  vfs: []
              ipv4:
                dhcp: false
                enabled: false
              ipv6:
                autoconf: false
                dhcp: false
                enabled: false
              mac-address: B8:59:9F:EF:71:5C
              mtu: 1500
              name: ens35f0
              state: up
              type: ethernet
            - ethernet:
                auto-negotiation: true
                duplex: full
                speed: 10000
                sr-iov:
                  total-vfs: 0
                  vfs: []
              ipv4:
                dhcp: false
                enabled: false
              ipv6:
                autoconf: false
                dhcp: false
                enabled: false
              mac-address: B8:59:9F:EF:71:5D
              mtu: 1500
              name: ens35f1
              state: up
              type: ethernet
            - ipv4:
                enabled: false
              ipv6:
                enabled: false
              mac-address: B4:96:91:67:2D:A4
              mtu: 1500
              name: ens47f0
              state: down
              type: ethernet
            - ethernet:
                auto-negotiation: true
                duplex: full
                speed: 1000
                sr-iov:
                  total-vfs: 0
                  vfs: []
              ipv4:
                address:
                - ip: 192.168.8.211
                  prefix-length: 24
                dhcp: false
                enabled: true
              ipv6:
                address:
                - ip: fe80::b696:91ff:fe67:2da5
                  prefix-length: 64
                autoconf: false
                dhcp: false
                enabled: true
              mac-address: B4:96:91:67:2D:A5
              mtu: 1500
              name: ens47f1
              state: up
              type: ethernet
            - ethernet:
                auto-negotiation: true
                duplex: full
                speed: 10000
                sr-iov:
                  total-vfs: 0
                  vfs: []
              ipv4:
                dhcp: false
                enabled: false
              ipv6:
                autoconf: false
                dhcp: false
                enabled: false
              mac-address: B8:59:9F:EF:71:5C
              mtu: 1500
              name: ens49f0
              state: up
              type: ethernet
            - ethernet:
                auto-negotiation: true
                duplex: full
                speed: 10000
                sr-iov:
                  total-vfs: 0
                  vfs: []
              ipv4:
                dhcp: false
                enabled: false
              ipv6:
                autoconf: false
                dhcp: false
                enabled: false
              mac-address: B8:59:9F:EF:71:5D
              mtu: 1500
              name: ens49f1
              state: up
              type: ethernet
            - ipv4:
                enabled: false
              ipv6:
                enabled: false
              mtu: 65536
              name: lo
              state: down
              type: unknown
            - ipv4:
                enabled: false
              ipv6:
                enabled: false
              mac-address: de:b2:ca:03:6b:fa
              mtu: 1450
              name: tun0
              state: down
              type: ovs-interface
            - ipv4:
                dhcp: false
                enabled: false
              ipv6:
                autoconf: false
                dhcp: false
                enabled: false
              mac-address: B8:59:9F:EF:71:5D
              mtu: 1500
              name: vlan-business
              state: up
              type: vlan
              vlan:
                base-iface: bond-13
                id: 991
            - ipv4:
                address:
                - ip: 192.168.18.211
                  prefix-length: 24
                dhcp: false
                enabled: true
              ipv6:
                address:
                - ip: fe80::e852:70de:e7be:8f04
                  prefix-length: 64
                auto-dns: true
                auto-gateway: true
                auto-routes: true
                autoconf: true
                dhcp: true
                enabled: true
              mac-address: B8:59:9F:EF:71:5C
              mtu: 1500
              name: vlan-ceph
              state: up
              type: vlan
              vlan:
                base-iface: bond-24
                id: 501
            - ipv4:
                enabled: false
              ipv6:
                enabled: false
              mac-address: C2:AE:59:84:C6:E0
              mtu: 65000
              name: vxlan_sys_4789
              state: down
              type: vxlan
              vxlan:
                base-iface: ""
                destination-port: 4789
                id: 0
                remote: ""
            route-rules:
              config: []
            routes:
              config:
              - destination: 0.0.0.0/0
                metric: -1
                next-hop-address: 192.168.8.1
                next-hop-interface: ens47f1
                table-id: 0
              running:
              - destination: 172.17.4.0/24
                metric: 425
                next-hop-address: ""
                next-hop-interface: br-business
                table-id: 254
              - destination: 0.0.0.0/0
                metric: 104
                next-hop-address: 192.168.8.1
                next-hop-interface: ens47f1
                table-id: 254
              - destination: 192.168.8.0/24
                metric: 104
                next-hop-address: ""
                next-hop-interface: ens47f1
                table-id: 254
              - destination: 192.168.18.0/24
                metric: 400
                next-hop-address: ""
                next-hop-interface: vlan-ceph
                table-id: 254
              - destination: fe80::/64
                metric: 425
                next-hop-address: ""
                next-hop-interface: br-business
                table-id: 254
              - destination: fe80::/64
                metric: 256
                next-hop-address: ""
                next-hop-interface: ens47f1
                table-id: 254
              - destination: fe80::/64
                metric: 400
                next-hop-address: ""
                next-hop-interface: vlan-ceph
                table-id: 254
              - destination: ff00::/8
                metric: 256
                next-hop-address: ""
                next-hop-interface: br-business
                table-id: 255
              - destination: ff00::/8
                metric: 256
                next-hop-address: ""
                next-hop-interface: ens47f1
                table-id: 255
              - destination: ff00::/8
                metric: 256
                next-hop-address: ""
                next-hop-interface: vlan-ceph
                table-id: 255
          lastSuccessfulUpdateTime: "2020-09-23T01:38:50Z"

next step

Multi-Queue

cases: