openshift 4.12 UPI in agent way, single node

OpenShift的安装方式很多了,现在又多了一种,agent based installer。最大的特点是,不需要额外的bootstrap节点了。这可是天大的好消息,因为,以前安装之前,和客户交流,客户总是不理解,为什么红帽说支持3节点部署,但是却要求提供4台服务器。也不能怪客户,按照一般的理解,之前红帽是不支持严格意义上的3节点部署,就因为有这个bootstrap. 现在好了,agent based installer是真正世俗意义上的支持3节点部署了。

从官方文档来看,能压缩掉bootstrap,是因为bootstrap相关的服务,都压缩到一个master节点上,并使用了assisted installer流程,来达到真正的3节点安装的。

  • https://docs.openshift.com/container-platform/4.12/installing/installing_with_agent_based_installer/preparing-to-install-with-agent-based-installer.html

本文,就用agent based installer来装一个单节点的ocp集群。

on helper node

# switch to you install version export BUILDNUMBER=4.12.9 pushd /data/ocp4/${BUILDNUMBER} tar -xzf openshift-client-linux-${BUILDNUMBER}.tar.gz -C /usr/local/bin/ tar -xzf openshift-install-linux-${BUILDNUMBER}.tar.gz -C /usr/local/bin/ # tar -xzf oc-mirror.tar.gz -C /usr/local/bin/ # chmod +x /usr/local/bin/oc-mirror install -m 755 /data/ocp4/clients/butane-amd64 /usr/local/bin/butane install -m 755 /data/ocp4/clients/coreos-installer_amd64 /usr/local/bin/coreos-installer popd # create a user and create the cluster under the user useradd -m 3node su - 3node ssh-keygen cat << EOF > ~/.ssh/config StrictHostKeyChecking no UserKnownHostsFile=/dev/null EOF chmod 600 ~/.ssh/config cat << 'EOF' >> ~/.bashrc export BASE_DIR='/home/3node/' EOF # export BASE_DIR='/home/3node/' export BUILDNUMBER=4.12.9 mkdir -p ${BASE_DIR}/data/{sno/disconnected,install} # set some parameter of you rcluster NODE_SSH_KEY="$(cat ${BASE_DIR}/.ssh/id_rsa.pub)" INSTALL_IMAGE_REGISTRY=quaylab.infra.wzhlab.top:5443 # PULL_SECRET='{"auths":{"registry.redhat.io": {"auth": "ZHVtbXk6ZHVtbXk=","email": "noemail@localhost"},"registry.ocp4.redhat.ren:5443": {"auth": "ZHVtbXk6ZHVtbXk=","email": "noemail@localhost"},"'${INSTALL_IMAGE_REGISTRY}'": {"auth": "'$( echo -n 'admin:shadowman' | openssl base64 )'","email": "noemail@localhost"}}}' PULL_SECRET=$(cat /data/pull-secret.json) NTP_SERVER=192.168.77.11 # HELP_SERVER=192.168.7.11 # KVM_HOST=192.168.7.11 # API_VIP=192.168.77.99 # INGRESS_VIP=192.168.77.98 # CLUSTER_PROVISION_IP=192.168.7.103 # BOOTSTRAP_IP=192.168.7.12 MACHINE_NETWORK='192.168.77.0/24' # 定义单节点集群的节点信息 SNO_CLUSTER_NAME=osp-demo SNO_BASE_DOMAIN=wzhlab.top BOOTSTRAP_IP=192.168.77.42 MASTER_01_IP=192.168.77.43 MASTER_02_IP=192.168.77.44 MASTER_03_IP=192.168.77.45 BOOTSTRAP_IPv6=fd03::42 MASTER_01_IPv6=fd03::43 MASTER_02_IPv6=fd03::44 MASTER_03_IPv6=fd03::45 BOOTSTRAP_HOSTNAME=bootstrap-demo MASTER_01_HOSTNAME=master-01-demo MASTER_02_HOSTNAME=master-02-demo MASTER_03_HOSTNAME=master-03-demo BOOTSTRAP_INTERFACE=enp1s0 MASTER_01_INTERFACE=enp1s0 MASTER_02_INTERFACE=enp1s0 MASTER_03_INTERFACE=enp1s0 MASTER_01_INTERFACE_MAC=52:54:00:12:A1:01 MASTER_02_INTERFACE_MAC=52:54:00:12:A1:02 MASTER_03_INTERFACE_MAC=52:54:00:12:A1:03 BOOTSTRAP_DISK=/dev/vda MASTER_01_DISK=/dev/vda MASTER_02_DISK=/dev/vda MASTER_03_DISK=/dev/vda OCP_GW=192.168.77.11 OCP_NETMASK=255.255.255.0 OCP_NETMASK_S=24 OCP_DNS=192.168.77.11 OCP_GW_v6=fd03::11 OCP_NETMASK_v6=64 # echo ${SNO_IF_MAC} > /data/sno/sno.mac mkdir -p ${BASE_DIR}/data/install cd ${BASE_DIR}/data/install /bin/rm -rf *.ign .openshift_install_state.json auth bootstrap manifests master*[0-9] worker*[0-9] * cat << EOF > ${BASE_DIR}/data/install/install-config.yaml apiVersion: v1 baseDomain: $SNO_BASE_DOMAIN compute: - name: worker replicas: 0 controlPlane: name: master replicas: 1 metadata: name: $SNO_CLUSTER_NAME networking: # OVNKubernetes , OpenShiftSDN clusterNetwork: - cidr: 172.21.0.0/16 hostPrefix: 23 # - cidr: fd02::/48 # hostPrefix: 64 machineNetwork: - cidr: $MACHINE_NETWORK # - cidr: 2001:DB8::/32 serviceNetwork: - 172.22.0.0/16 # - fd03::/112 platform: none: {} pullSecret: '${PULL_SECRET}' sshKey: | $( cat ${BASE_DIR}/.ssh/id_rsa.pub | sed 's/^/ /g' ) additionalTrustBundle: | $( cat /etc/crts/redhat.ren.ca.crt | sed 's/^/ /g' ) imageContentSources: - mirrors: - ${INSTALL_IMAGE_REGISTRY}/ocp4/openshift4 source: quay.io/openshift-release-dev/ocp-release - mirrors: - ${INSTALL_IMAGE_REGISTRY}/ocp4/openshift4 source: quay.io/openshift-release-dev/ocp-v4.0-art-dev EOF cat << EOF > ${BASE_DIR}/data/install/agent-config.yaml apiVersion: v1alpha1 kind: AgentConfig metadata: name: $SNO_CLUSTER_NAME rendezvousIP: $MASTER_01_IP additionalNTPSources: - $NTP_SERVER hosts: - hostname: $MASTER_01_HOSTNAME role: master rootDeviceHints: deviceName: "$MASTER_01_DISK" interfaces: - name: $MASTER_01_INTERFACE macAddress: $MASTER_01_INTERFACE_MAC networkConfig: interfaces: - name: $MASTER_01_INTERFACE type: ethernet state: up mac-address: $MASTER_01_INTERFACE_MAC ipv4: enabled: true address: - ip: $MASTER_01_IP prefix-length: $OCP_NETMASK_S dhcp: false dns-resolver: config: server: - $OCP_DNS routes: config: - destination: 0.0.0.0/0 next-hop-address: $OCP_GW next-hop-interface: $MASTER_01_INTERFACE table-id: 254 EOF /bin/cp -f ${BASE_DIR}/data/install/install-config.yaml ${BASE_DIR}/data/install/install-config.yaml.bak openshift-install --dir=${BASE_DIR}/data/install agent create cluster-manifests sudo bash -c "/bin/cp -f mirror/registries.conf /etc/containers/registries.conf.d/; chmod +r /etc/containers/registries.conf.d/*" # /bin/cp -f /data/ocp4/ansible-helper/files/* ${BASE_DIR}/data/install/openshift/ sudo bash -c "cd /data/ocp4 ; bash image.registries.conf.sh quaylab.infra.wzhlab.top:5443 ;" /bin/cp -f /data/ocp4/99-worker-container-registries.yaml ${BASE_DIR}/data/install/openshift /bin/cp -f /data/ocp4/99-master-container-registries.yaml ${BASE_DIR}/data/install/openshift cd ${BASE_DIR}/data/install/ # openshift-install --dir=${BASE_DIR}/data/install create ignition-configs mkdir -p ~/.cache/agent/image_cache/ /bin/cp -f /data/ocp-$BUILDNUMBER/rhcos-live.x86_64.iso ~/.cache/agent/image_cache/coreos-x86_64.iso openshift-install --dir=${BASE_DIR}/data/install agent create image --log-level=debug # ...... # DEBUG Fetching image from OCP release (oc adm release info --image-for=machine-os-images --insecure=true --icsp-file=/tmp/icsp-file3636774741 quay.io/openshift-release-dev/ocp-release@sha256:96bf74ce789ccb22391deea98e0c5050c41b67cc17defbb38089d32226dba0b8) # DEBUG The file was found in cache: /home/3node/.cache/agent/image_cache/coreos-x86_64.iso # INFO Verifying cached file # DEBUG extracting /coreos/coreos-x86_64.iso.sha256 to /tmp/cache1876698393, oc image extract --path /coreos/coreos-x86_64.iso.sha256:/tmp/cache1876698393 --confirm --icsp-file=/tmp/icsp-file455852761 quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:052130abddf741195b6753888cf8a00757dedeb7010f7d4dcc4b842b5bc705f6 # ...... coreos-installer iso ignition show agent.x86_64.iso > ignition.ign # HTTP_PATH=http://192.168.7.11:8080/ignition source /data/ocp4/acm.fn.sh # 我们会创建一个wzh用户,密码是redhat,这个可以在第一次启动的是,从console/ssh直接用用户名口令登录 # 方便排错和研究 VAR_PWD_HASH="$(python3 -c 'import crypt,getpass; print(crypt.crypt("redhat"))')" cat ${BASE_DIR}/data/install/ignition.ign \ | jq --arg VAR "$VAR_PWD_HASH" --arg VAR_SSH "$NODE_SSH_KEY" '.passwd.users += [{ "name": "wzh", "system": true, "passwordHash": $VAR , "sshAuthorizedKeys": [ $VAR_SSH ], "groups": [ "adm", "wheel", "sudo", "systemd-journal" ] }]' \ | jq '. += { "kernel_arguments" : { "should_exist" : [ "systemd.debug-shell=1" ] } }' \ | jq -c . \ > ${BASE_DIR}/data/install/ignition-iso.ign coreos-installer iso ignition embed -f -i ignition-iso.ign agent.x86_64.iso # VAR_IMAGE_VER=rhcos-410.86.202303200936-AnolisOS-0-live.x86_64.iso

on kvm host ( 103 )

cleanup

create_lv() { var_vg=$1 var_pool=$2 var_lv=$3 var_size=$4 var_action=$5 lvremove -f $var_vg/$var_lv # lvcreate -y -L $var_size -n $var_lv $var_vg if [ "$var_action" == "recreate" ]; then lvcreate --type thin -n $var_lv -V $var_size --thinpool $var_vg/$var_pool wipefs --all --force /dev/$var_vg/$var_lv fi } virsh destroy ocp4-acm-one-bootstrap virsh undefine ocp4-acm-one-bootstrap create_lv vgdata poolA lvacm-one-bootstrap 500G create_lv vgdata poolA lvacm-one-bootstrap-data 500G virsh destroy ocp4-acm-one-master-01 virsh undefine ocp4-acm-one-master-01 create_lv vgdata poolA lvacm-one-master-01 500G create_lv vgdata poolA lvacm-one-master-01-data 500G virsh destroy ocp4-acm-one-master-02 virsh undefine ocp4-acm-one-master-02 create_lv vgdata poolA lvacm-one-master-02 500G create_lv vgdata poolA lvacm-one-master-02-data 500G virsh destroy ocp4-acm-one-master-03 virsh undefine ocp4-acm-one-master-03 create_lv vgdata poolA lvacm-one-master-03 500G create_lv vgdata poolA lvacm-one-master-03-data 500G

begin

cat << EOF >> /etc/sysctl.d/99-wzh-sysctl.conf vm.overcommit_memory = 1 EOF sysctl --system # 创建实验用虚拟网络 mkdir -p /data/kvm cd /data/kvm cat << 'EOF' > /data/kvm/bridge.sh #!/usr/bin/env bash PUB_CONN='eno1' PUB_IP='172.21.6.103/24' PUB_GW='172.21.6.254' PUB_DNS='172.21.1.1' nmcli con down "$PUB_CONN" nmcli con delete "$PUB_CONN" nmcli con down baremetal nmcli con delete baremetal # RHEL 8.1 appends the word "System" in front of the connection,delete in case it exists nmcli con down "System $PUB_CONN" nmcli con delete "System $PUB_CONN" nmcli connection add ifname baremetal type bridge con-name baremetal ipv4.method 'manual' \ ipv4.address "$PUB_IP" \ ipv4.gateway "$PUB_GW" \ ipv4.dns "$PUB_DNS" nmcli con add type bridge-slave ifname "$PUB_CONN" master baremetal nmcli con down "$PUB_CONN";pkill dhclient;dhclient baremetal nmcli con up baremetal EOF bash /data/kvm/bridge.sh nmcli con mod baremetal +ipv4.addresses "192.168.7.103/24" nmcli con up baremetal cat << EOF > /root/.ssh/config StrictHostKeyChecking no UserKnownHostsFile=/dev/null EOF pvcreate -y /dev/vdb vgcreate vgdate /dev/vdb # https://access.redhat.com/articles/766133 lvcreate -y -n poolA -L 500G vgdata lvcreate -y -n poolA_meta -L 10G vgdata lvconvert -y --thinpool vgdata/poolA --poolmetadata vgdata/poolA_meta lvextend -l +100%FREE vgdata/poolA mkdir -p /data/kvm/one/ scp root@192.168.77.11:/home/3node/data/install/agent.x86_64.iso /data/kvm/one/ create_lv() { var_vg=$1 var_pool=$2 var_lv=$3 var_size=$4 var_action=$5 lvremove -f $var_vg/$var_lv # lvcreate -y -L $var_size -n $var_lv $var_vg if [ "$var_action" == "recreate" ]; then lvcreate --type thin -n $var_lv -V $var_size --thinpool $var_vg/$var_pool wipefs --all --force /dev/$var_vg/$var_lv fi } SNO_MEM=64 virsh destroy ocp4-acm-one-master-01 virsh undefine ocp4-acm-one-master-01 create_lv vgdata poolA lvacm-one-master-01 500G recreate create_lv vgdata poolA lvacm-one-master-01-data 500G recreate virt-install --name=ocp4-acm-one-master-01 --vcpus=16 --ram=$(($SNO_MEM*1024)) \ --cpu=host-model \ --disk path=/dev/vgdata/lvacm-one-master-01,device=disk,bus=virtio,format=raw \ --disk path=/dev/vgdata/lvacm-one-master-01-data,device=disk,bus=virtio,format=raw \ --os-variant rhel8.3 --network bridge=baremetal,model=virtio,mac=52:54:00:12:A1:01 \ --graphics vnc,port=59003 --noautoconsole \ --boot menu=on --cdrom /data/kvm/one/agent.x86_64.iso

on helper to see result

for unkonwn reason, the vm will be shutdown, instead of reboot, you have to poweron it manually.

cd ${BASE_DIR}/data/install export KUBECONFIG=${BASE_DIR}/data/install/auth/kubeconfig echo "export KUBECONFIG=${BASE_DIR}/data/install/auth/kubeconfig" >> ~/.bashrc # oc completion bash | sudo tee /etc/bash_completion.d/openshift > /dev/null cd ${BASE_DIR}/data/install openshift-install --dir=${BASE_DIR}/data/install agent wait-for bootstrap-complete --log-level=debug # ...... # DEBUG RendezvousIP from the AgentConfig 192.168.77.43 # INFO Bootstrap Kube API Initialized # INFO Bootstrap configMap status is complete # INFO cluster bootstrap is complete cd ${BASE_DIR}/data/install openshift-install --dir=${BASE_DIR}/data/install agent wait-for install-complete --log-level=debug # ...... # INFO Install complete! # INFO To access the cluster as the system:admin user when using 'oc', run # INFO export KUBECONFIG=/home/3node/data/install/auth/kubeconfig # INFO Access the OpenShift web-console here: https://console-openshift-console.apps.osp-demo.wzhlab.top # INFO Login to the console with user: "kubeadmin", and password: "UmfI2-99uAb-BRdaS-LLjQ9"

password login and oc config

# init setting for helper node cat << EOF > ~/.ssh/config StrictHostKeyChecking no UserKnownHostsFile=/dev/null EOF chmod 600 ~/.ssh/config # ssh core@***** # sudo -i # # change password for root # echo 'redhat' | passwd --stdin root # sed -i "s|^PasswordAuthentication no$|PasswordAuthentication yes|g" /etc/ssh/sshd_config # sed -i "s|^PermitRootLogin no$|PermitRootLogin yes|g" /etc/ssh/sshd_config # sed -i "s|^#ClientAliveInterval 180$|ClientAliveInterval 1800|g" /etc/ssh/sshd_config # systemctl restart sshd # # set env, so oc can be used # cat << EOF >> ~/.bashrc # export KUBECONFIG=/etc/kubernetes/static-pod-resources/kube-apiserver-certs/secrets/node-kubeconfigs/localhost.kubeconfig # RET=`oc config use-context system:admin` # EOF cat > ${BASE_DIR}/data/install/crack.txt << EOF echo redhat | sudo passwd --stdin root sudo sed -i "s|^PasswordAuthentication no$|PasswordAuthentication yes|g" /etc/ssh/sshd_config sudo sed -i "s|^PermitRootLogin no$|PermitRootLogin yes|g" /etc/ssh/sshd_config sudo sed -i "s|^#ClientAliveInterval 180$|ClientAliveInterval 1800|g" /etc/ssh/sshd_config sudo systemctl restart sshd sudo sh -c 'echo "export KUBECONFIG=/etc/kubernetes/static-pod-resources/kube-apiserver-certs/secrets/node-kubeconfigs/localhost.kubeconfig" >> /root/.bashrc' sudo sh -c 'echo "RET=\\\`oc config use-context system:admin\\\`" >> /root/.bashrc' EOF for i in 23 24 25 do ssh core@192.168.7.$i < ${BASE_DIR}/data/install/crack.txt done

from other host

# https://unix.stackexchange.com/questions/230084/send-the-password-through-stdin-in-ssh-copy-id dnf install -y sshpass for i in 23 24 25 do sshpass -p 'redhat' ssh-copy-id root@192.168.7.$i done

poweroff

for i in 23 24 25 do ssh root@192.168.7.$i poweroff done

poweron

virsh start ocp4-acm-one-master-01 virsh start ocp4-acm-one-master-02 virsh start ocp4-acm-one-master-03

back and merge kubeconfig

mkdir -p ~/.kube/bak/ var_date=$(date '+%Y-%m-%d-%H%M') /bin/cp -f /data/install/auth/kubeconfig ~/.kube/bak/kubeconfig-$var_date /bin/cp -f /data/install/auth/kubeadmin-password ~/.kube/bak/kubeadmin-password-$var_date sed "s/admin/admin\/$SNO_CLUSTER_NAME/g" /data/install/auth/kubeconfig > /tmp/config.new # https://medium.com/@jacobtomlinson/how-to-merge-kubernetes-kubectl-config-files-737b61bd517d /bin/cp -f ~/.kube/config ~/.kube/config.bak && KUBECONFIG=~/.kube/config:/tmp/config.new kubectl config view --flatten > /tmp/config && /bin/mv -f /tmp/config ~/.kube/config unset KUBECONFIG

add worker node

我们装好了single node,那么接下来,我们还可以给这个single node添加worker节点,让这个single node cluster变成一个单master的集群。

# first, lets stick ingress to master oc label node acm-demo-hub-master ocp-ingress-run="true" oc patch ingresscontroller default -n openshift-ingress-operator --type=merge --patch='{"spec":{"nodePlacement":{"nodeSelector": {"matchLabels":{"ocp-ingress-run":"true"}}}}}' # we are testing env, so we don't need ingress replicas. oc patch --namespace=openshift-ingress-operator --patch='{"spec": {"replicas": 1}}' --type=merge ingresscontroller/default oc get -n openshift-ingress-operator ingresscontroller/default -o yaml # then we get worker's ignition file, and start worker node, add it to cluster oc extract -n openshift-machine-api secret/worker-user-data --keys=userData --to=- > /var/www/html/ignition/sno-worker.ign HELP_SERVER=192.168.7.11 # 定义单节点集群的节点信息 SNO_IP=192.168.7.16 SNO_GW=192.168.7.11 SNO_NETMAST=255.255.255.0 SNO_HOSTNAME=acm-demo-hub-worker-01 SNO_IF=enp1s0 SNO_DNS=192.168.7.11 SNO_DISK=/dev/vda SNO_MEM=16 BOOT_ARG=" ip=$SNO_IP::$SNO_GW:$SNO_NETMAST:$SNO_HOSTNAME:$SNO_IF:none nameserver=$SNO_DNS coreos.inst.install_dev=${SNO_DISK##*/} coreos.inst.ignition_url=http://$HELP_SERVER:8080/ignition/sno-worker.ign" /bin/cp -f /data/ocp4/rhcos-live.x86_64.iso sno.iso coreos-installer iso kargs modify -a "$BOOT_ARG" sno.iso # go to kvm host ( 103 ) scp root@192.168.7.11:/data/install/sno.iso /data/kvm/ virsh destroy ocp4-acm-hub-worker01 virsh undefine ocp4-acm-hub-worker01 create_lv() { var_vg=$1 var_pool=$2 var_lv=$3 var_size=$4 var_action=$5 lvremove -f $var_vg/$var_lv # lvcreate -y -L $var_size -n $var_lv $var_vg if [ "$var_action" == "recreate" ]; then lvcreate --type thin -n $var_lv -V $var_size --thinpool $var_vg/$var_pool wipefs --all --force /dev/$var_vg/$var_lv fi } create_lv vgdata poolA lvacmhub-worker01 500G recreate # create_lv vgdata poolA lvacmhub-worker01-data 500G remove virt-install --name=ocp4-acm-hub-worker01 --vcpus=16 --ram=$(($SNO_MEM*1024)) \ --cpu=host-model \ --disk path=/dev/vgdata/lvacmhub-worker01,device=disk,bus=virtio,format=raw \ `# --disk path=/dev/vgdata/lvacmhub-data,device=disk,bus=virtio,format=raw` \ --os-variant rhel8.3 --network bridge=baremetal,model=virtio \ --graphics vnc,port=59003 \ --boot menu=on --cdrom /data/kvm/sno.iso # after 2 boot up, # go back to helper oc get csr oc get csr -ojson | jq -r '.items[] | select(.status == {} ) | .metadata.name' | xargs oc adm certificate approve

end