coreos 启动分析
我们知道,coreos 是内核和根文件系统,一起打包升级的,也就是所谓的 A/B 切换升级,那么他到底是怎么实现这个的呢?现在我们就来分析一下。
视频讲解
首先我们分析一下 /boot 分区
ls -hl /boot/loader/entries/
total 4.0K
-rw-r--r--. 1 root root 629 Apr 9 11:57 ostree-1-rhcos.conf
-rw-r--r--. 1 root root 630 Apr 9 11:57 ostree-2-rhcos.conf
cat /boot/loader/entries/*.conf
title Red Hat Enterprise Linux CoreOS 49.84.202110081407-0 (Ootpa) (ostree:1)
version 1
options random.trust_cpu=on console=tty0 console=ttyS0,115200n8 ignition.platform.id=metal $ignition_firstboot ostree=/ostree/boot.0/rhcos/a10b07df1aa66c008cd3b9acb17d765f0755702cadfa0090155dced4d2e9bfe0/0 ip=enp1s0:dhcp root=UUID=0a0d4701-04bf-45a2-8b9b-f761542a617a rw rootflags=prjquota
linux /ostree/rhcos-a10b07df1aa66c008cd3b9acb17d765f0755702cadfa0090155dced4d2e9bfe0/vmlinuz-4.18.0-305.19.1.el8_4.x86_64
initrd /ostree/rhcos-a10b07df1aa66c008cd3b9acb17d765f0755702cadfa0090155dced4d2e9bfe0/initramfs-4.18.0-305.19.1.el8_4.x86_64.img
title Red Hat Enterprise Linux CoreOS 410.84.202203081640-0 (Ootpa) (ostree:0)
version 2
options random.trust_cpu=on console=tty0 console=ttyS0,115200n8 ignition.platform.id=metal $ignition_firstboot ostree=/ostree/boot.0/rhcos/838cd9a10892dbd5e32ffdbec249a4c0db18f6d1c56f416f7a59a2f806f55941/0 ip=enp1s0:dhcp root=UUID=0a0d4701-04bf-45a2-8b9b-f761542a617a rw rootflags=prjquota
linux /ostree/rhcos-838cd9a10892dbd5e32ffdbec249a4c0db18f6d1c56f416f7a59a2f806f55941/vmlinuz-4.18.0-305.40.1.el8_4.x86_64
initrd /ostree/rhcos-838cd9a10892dbd5e32ffdbec249a4c0db18f6d1c56f416f7a59a2f806f55941/initramfs-4.18.0-305.40.1.el8_4.x86_64.img
我们可以清晰的看到,这里面定义了2个入口,并且每个入口,对应了/boot/ostree/
参考文档:
- https://access.redhat.com/solutions/5847011
再分析一下 mount
lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sr0 11:0 1 104M 0 rom
vda 252:0 0 120G 0 disk
├─vda1 252:1 0 1M 0 part
├─vda2 252:2 0 127M 0 part
├─vda3 252:3 0 384M 0 part /boot
└─vda4 252:4 0 119.5G 0 part /sysroot
mount | grep vda4
/dev/vda4 on /sysroot type xfs (ro,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota)
/dev/vda4 on / type xfs (rw,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota)
/dev/vda4 on /etc type xfs (rw,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota)
/dev/vda4 on /usr type xfs (ro,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota)
/dev/vda4 on /var type xfs (rw,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota)
/dev/vda4 on /var/lib/containers/storage/overlay type xfs (rw,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota)
/dev/vda4 on /var/lib/kubelet/pods/80389395-c0f4-4342-a2ee-2b8c31dbbdbc/volume-subpaths/etc/tuned/1 type xfs (rw,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota)
/dev/vda4 on /var/lib/kubelet/pods/80389395-c0f4-4342-a2ee-2b8c31dbbdbc/volume-subpaths/etc/tuned/2 type xfs (rw,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota)
/dev/vda4 on /var/lib/kubelet/pods/80389395-c0f4-4342-a2ee-2b8c31dbbdbc/volume-subpaths/etc/tuned/3 type xfs (rw,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota)
/dev/vda4 on /var/lib/kubelet/pods/80389395-c0f4-4342-a2ee-2b8c31dbbdbc/volume-subpaths/etc/tuned/4 type xfs (rw,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota)
/dev/vda4 on /var/lib/kubelet/pods/80389395-c0f4-4342-a2ee-2b8c31dbbdbc/volume-subpaths/etc/tuned/5 type xfs (rw,relatime,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota)
我们非常困惑,mount命令的输出显示,vda4被挂载了很多次,每次都是不同的路径,这是为什么呢?
cat /proc/1/mountinfo | grep vda4
99 102 252:4 / /sysroot ro,relatime - xfs /dev/vda4 rw,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota
102 1 252:4 /ostree/deploy/rhcos/deploy/b1df1247e3ad53173c1e13a913ec645d48a22f6a294e70e2ca5bda8c31f78d78.0 / rw,relatime shared:1 - xfs /dev/vda4 rw,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota
103 102 252:4 /ostree/deploy/rhcos/deploy/b1df1247e3ad53173c1e13a913ec645d48a22f6a294e70e2ca5bda8c31f78d78.0/etc /etc rw,relatime shared:2 - xfs /dev/vda4 rw,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota
104 102 252:4 /ostree/deploy/rhcos/deploy/b1df1247e3ad53173c1e13a913ec645d48a22f6a294e70e2ca5bda8c31f78d78.0/usr /usr ro,relatime shared:3 - xfs /dev/vda4 rw,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota
133 102 252:4 /ostree/deploy/rhcos/var /var rw,relatime shared:4 - xfs /dev/vda4 rw,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota
299 133 252:4 /ostree/deploy/rhcos/var/lib/containers/storage/overlay /var/lib/containers/storage/overlay rw,relatime - xfs /dev/vda4 rw,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota
7886 133 252:4 /ostree/deploy/rhcos/deploy/b1df1247e3ad53173c1e13a913ec645d48a22f6a294e70e2ca5bda8c31f78d78.0/etc/modprobe.d /var/lib/kubelet/pods/80389395-c0f4-4342-a2ee-2b8c31dbbdbc/volume-subpaths/etc/tuned/1 rw,relatime shared:2 - xfs /dev/vda4 rw,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota
5920 133 252:4 /ostree/deploy/rhcos/deploy/b1df1247e3ad53173c1e13a913ec645d48a22f6a294e70e2ca5bda8c31f78d78.0/etc/sysconfig /var/lib/kubelet/pods/80389395-c0f4-4342-a2ee-2b8c31dbbdbc/volume-subpaths/etc/tuned/2 rw,relatime shared:2 - xfs /dev/vda4 rw,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota
7429 133 252:4 /ostree/deploy/rhcos/deploy/b1df1247e3ad53173c1e13a913ec645d48a22f6a294e70e2ca5bda8c31f78d78.0/etc/sysctl.d /var/lib/kubelet/pods/80389395-c0f4-4342-a2ee-2b8c31dbbdbc/volume-subpaths/etc/tuned/3 rw,relatime shared:2 - xfs /dev/vda4 rw,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota
7965 133 252:4 /ostree/deploy/rhcos/deploy/b1df1247e3ad53173c1e13a913ec645d48a22f6a294e70e2ca5bda8c31f78d78.0/etc/sysctl.conf /var/lib/kubelet/pods/80389395-c0f4-4342-a2ee-2b8c31dbbdbc/volume-subpaths/etc/tuned/4 rw,relatime shared:2 - xfs /dev/vda4 rw,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota
8491 133 252:4 /ostree/deploy/rhcos/deploy/b1df1247e3ad53173c1e13a913ec645d48a22f6a294e70e2ca5bda8c31f78d78.0/etc/systemd /var/lib/kubelet/pods/80389395-c0f4-4342-a2ee-2b8c31dbbdbc/volume-subpaths/etc/tuned/5 rw,relatime shared:2 - xfs /dev/vda4 rw,seclabel,attr2,inode64,logbufs=8,logbsize=32k,prjquota
答案在/proc/1/mountinfo中,我们来仔细分析一下里面的内容,特别是根文件系统的挂载。
- 我们看第一行,/dev/vda4说的是设备,xfs说的是这个设备上的文件系统,/ 说的是设备上的本来的目录, /sysroot 说的是挂载到当前进程空间的什么目录
- 我们再来看第二行,/dev/vda4说的是设备,xfs说的是这个设备上的文件系统,/ostree/deploy/rhcos/deploy/b1df1247e3ad53173c1e13a913ec645d48a22f6a294e70e2ca5bda8c31f78d78.0 说的是设备上的本来的目录, / 说的是挂载到当前进程空间的什么目录
所以,总结下来,/dev/vda4 上面的目录结构,和我们一般的目录结果不一样,系统启动以后,关键的路径被重新的安排就位了一下。
find /sysroot -maxdepth 3
/sysroot
/sysroot/boot
/sysroot/ostree
/sysroot/ostree/repo
/sysroot/ostree/repo/config
/sysroot/ostree/repo/tmp
/sysroot/ostree/repo/extensions
/sysroot/ostree/repo/state
/sysroot/ostree/repo/refs
/sysroot/ostree/repo/objects
/sysroot/ostree/repo/.lock
/sysroot/ostree/deploy
/sysroot/ostree/deploy/rhcos
/sysroot/ostree/boot.0.1
/sysroot/ostree/boot.0.1/rhcos
/sysroot/ostree/boot.0
/sysroot/.coreos-aleph-version.json
调查和 mount fs 相关的systemd
systemctl cat ostree-remount.service
[Unit]
Description=OSTree Remount OS/ Bind Mounts
Documentation=man:ostree(1)
DefaultDependencies=no
ConditionKernelCommandLine=ostree
OnFailure=emergency.target
Conflicts=umount.target
# Run after core mounts
After=-.mount var.mount
After=systemd-remount-fs.service
# But we run *before* most other core bootup services that need write access to /etc and /var
Before=local-fs.target umount.target
Before=systemd-random-seed.service plymouth-read-write.service systemd-journal-flush.service
Before=systemd-tmpfiles-setup.service
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=/usr/lib/ostree/ostree-remount
StandardInput=null
StandardOutput=journal
StandardError=journal+console
[Install]
WantedBy=local-fs.target
systemctl list-unit-files | grep mount
proc-sys-fs-binfmt_misc.automount static
boot.mount generated
dev-hugepages.mount static
dev-mqueue.mount static
proc-fs-nfsd.mount static
proc-sys-fs-binfmt_misc.mount static
run-vmblock\x2dfuse.mount disabled
sys-fs-fuse-connections.mount static
sys-kernel-config.mount static
sys-kernel-debug.mount static
tmp.mount disabled
var-lib-nfs-rpc_pipefs.mount static
var.mount generated
dracut-mount.service static
dracut-pre-mount.service static
nfs-mountd.service static
ostree-remount.service disabled
systemd-remount-fs.service static
umount.target static
systemctl cat dracut-mount.service
# /usr/lib/systemd/system/../../dracut/modules.d/98dracut-systemd/dracut-mount.service
# This file is part of dracut.
#
# See dracut.bootup(7) for details
[Unit]
Description=dracut mount hook
Documentation=man:dracut-mount.service(8)
After=initrd-root-fs.target initrd-parse-etc.service
After=dracut-initqueue.service dracut-pre-mount.service
ConditionPathExists=/usr/lib/initrd-release
ConditionDirectoryNotEmpty=|/lib/dracut/hooks/mount
ConditionKernelCommandLine=|rd.break=mount
DefaultDependencies=no
Conflicts=shutdown.target emergency.target
[Service]
Environment=DRACUT_SYSTEMD=1
Environment=NEWROOT=/sysroot
Type=oneshot
ExecStart=-/bin/dracut-mount
StandardInput=null
StandardOutput=syslog
StandardError=syslog+console
KillMode=process
RemainAfterExit=yes
# Bash ignores SIGTERM, so we send SIGHUP instead, to ensure that bash
# terminates cleanly.
KillSignal=SIGHUP
参考文档:
- https://man7.org/linux/man-pages/man7/dracut.bootup.7.html
- https://ostreedev.github.io/ostree/adapting-existing/#booting-and-initramfs-technology