部署安装基于containerd 运行时kubernetes-v1.26.0一、准备
系统 | CPU | RAM | IP | 网卡 | 主机名 |
---|---|---|---|---|---|
centos7 | 2 | 2 | 192.168.84.128 | NAT | master |
centos7 | 2 | 2 | 192.168.84.129 | NAT | node |
环境配置(所有节点)
- 修改主机名
#master节点hostnamectl set-hostname masterbash#node节点hostnamectl set-hostname nodebash
- 配置hosts映射
cat >> /etc/hosts << EOF192.168.84.128 master192.168.84.129 nodeEOF
关闭防火墙
systemctl stop firewalldsystemctl disable firewalld
关闭selinux
setenforce 0sed -i "s/SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config
- 关闭交换分区
swapoff -ased -i 's/.*swap.*/#&/' /etc/fstab
- 转发 IPv4 并让 iptables 看到桥接流
#转发 IPv4 并让 iptables 看到桥接流量cat <<EOF | sudo tee /etc/modules-load.d/k8s.confoverlaybr_netfilterEOFsudo modprobe overlaysudo modprobe br_netfilterlsmod | grep br_netfilter #验证br_netfilter模块# 设置所需的 sysctl 参数,参数在重新启动后保持不变cat <<EOF | sudo tee /etc/sysctl.d/k8s.confnet.bridge.bridge-nf-call-iptables = 1net.bridge.bridge-nf-call-ip6tables = 1net.ipv4.ip_forward = 1EOF# 应用 sysctl 参数而不重新启动sudo sysctl --system
- 配置 时间同步
yum install ntpdate -yntpdate ntp1.aliyun.com
安装containerd(所有节点操作)
下载containerd
下载地址
我这里下载1.16.9
上传到所有节点
安装
tar Cvzxf /usr/local containerd-1.6.19-linux-amd64.tar.gz# 通过 systemd 启动 containerd$ vi /etc/systemd/system/containerd.service# Copyright The containerd Authors.## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.[Unit]Description=containerd container runtimeDocumentation=https://containerd.ioAfter=network.target local-fs.target[Service]#uncomment to enable the experimental sbservice (sandboxed) version of containerd/cri integration#Environment="ENABLE_CRI_SANDBOXES=sandboxed"ExecStartPre=-/sbin/modprobe overlayExecStart=/usr/local/bin/containerdType=notifyDelegate=yesKillMode=processRestart=alwaysRestartSec=5# Having non-zero Limit*s causes performance problems due to accounting overhead# in the kernel. We recommend using cgroups to do container-local accounting.LimitNPROC=infinityLimitCORE=infinityLimitNOFILE=infinity# Comment TasksMax if your systemd version does not supports it.# Only systemd 226 and above support this version.TasksMax=infinityOOMScoreAdjust=-999[Install]WantedBy=multi-user.target
# 加载配置、启动systemctl daemon-reloadsystemctl enable --now containerd# 验证ctr version#生成配置文件mkdir /etc/containerdcontainerd config default > /etc/containerd/config.tomlsystemctl restart containerd
安装runc(所有节点操作)
- 下载runc
下载地址
- 安装
install -m 755 runc.amd64 /usr/local/sbin/runc# 验证runc -v
安装CNI
- 下载并上传到服务器
下载地址
- 安装
mkdir -p /opt/cni/bintar Cxzvf /opt/cni/bin cni-plugins-linux-amd64-v1.2.0.tgz
配置加速器(所有节点操作)
#参考:https://github.com/containerd/containerd/blob/main/docs/cri/config.md#registry-configuration#添加 config_path = "/etc/containerd/certs.d"sed -i 's/config_path\ =.*/config_path = \"\/etc\/containerd\/certs.d\"/g' /etc/containerd/config.tomlmkdir /etc/containerd/certs.d/docker.io -pcat > /etc/containerd/certs.d/docker.io/hosts.toml << EOFserver = "https://docker.io"[host."https://vh3bm52y.mirror.aliyuncs.com"] capabilities = ["pull", "resolve"]EOF systemctl daemon-reload && systemctl restart containerd
cgroup 驱动(所有节点操作)
#把SystemdCgroup = false修改为:SystemdCgroup = truesed -i 's/SystemdCgroup\ =\ false/SystemdCgroup\ =\ true/g' /etc/containerd/config.toml#把sandbox_image = "k8s.gcr.io/pause:3.6"修改为:sandbox_image="registry.aliyuncs.com/google_containers/pause:3.9"sed -i 's/sandbox_image\ =.*/sandbox_image\ =\ "registry.aliyuncs.com\/google_containers\/pause:3.9"/g' /etc/containerd/config.toml|grep sandbox_imagesystemctl daemon-reload systemctl restart containerd
安装crictl(所有节点)
- 下载
下载地址
这里我选择v1.26.0这个版本的
安装
上传到服务器
tar -vzxf crictl-v1.26.0-linux-amd64.tar.gzmv crictl /usr/local/bin/cat >> /etc/crictl.yaml << EOFruntime-endpoint: unix:///var/run/containerd/containerd.sockimage-endpoint: unix:///var/run/containerd/containerd.socktimeout: 10debug: trueEOFsystemctl restart containerd
安装kubelet,kubeadm,kubectl(所有节点)
cat < /etc/yum.repos.d/kubernetes.repo[kubernetes]name=Kubernetesbaseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/enabled=1gpgcheck=1repo_gpgcheck=1gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpgEOFyum install --nogpgcheck kubelet kubeadm kubectl -ysystemctl enable kubelet
配置ipvs(所有节点)
#参考:https://cloud.tencent.com/developer/article/1717552#:~:text=%E5%9C%A8%E6%89%80%E6%9C%89%E8%8A%82%E7%82%B9%E4%B8%8A%E5%AE%89%E8%A3%85ipset%E8%BD%AF%E4%BB%B6%E5%8C%85%20yum%20install%20ipset%20-y,%E4%B8%BA%E4%BA%86%E6%96%B9%E4%BE%BF%E6%9F%A5%E7%9C%8Bipvs%E8%A7%84%E5%88%99%E6%88%91%E4%BB%AC%E8%A6%81%E5%AE%89%E8%A3%85ipvsadm%20%28%E5%8F%AF%E9%80%89%29%20yum%20install%20ipvsadm%20-y#安装ipset和ipvsadmyum install ipset ipvsadm -y#由于ipvs已经加入到了内核的主干,所以为kube-proxy开启ipvs的前提需要加载以下的内核模块cat > /etc/sysconfig/modules/ipvs.modules <> /etc/sysconfig/kubelet << EOF# KUBELET_CGROUP_ARGS="--cgroup-driver=systemd"KUBE_PROXY_MODE="ipvs"EOF
初始化master节点
- 生成默认文件
kubeadm config print init-defaults > kubeadm.yaml
- 修改kubeadm.yaml文件
vim kubeadm.yamlapiVersion: kubeadm.k8s.io/v1beta3bootstrapTokens:- groups: - system:bootstrappers:kubeadm:default-node-token token: abcdef.0123456789abcdef ttl: 24h0m0s usages: - signing - authenticationkind: InitConfigurationlocalAPIEndpoint: advertiseAddress: 192.168.200.5 # 修改为宿主机ip bindPort: 6443nodeRegistration: criSocket: unix:///var/run/containerd/containerd.sock imagePullPolicy: IfNotPresent name: master # 修改为宿主机名 taints: null---apiServer: timeoutForControlPlane: 4m0sapiVersion: kubeadm.k8s.io/v1beta3certificatesDir: /etc/kubernetes/pkiclusterName: kubernetescontrollerManager: {}dns: {}etcd: local: dataDir: /var/lib/etcdimageRepository: registry.aliyuncs.com/google_containers # 修改为阿里镜像kind: ClusterConfigurationkubernetesVersion: 1.26.0 # kubeadm的版本为多少这里就修改为多少networking: dnsDomain: cluster.local serviceSubnet: 10.96.0.0/12 podSubnet: 10.244.0.0/16 ## 设置pod网段scheduler: {}###添加内容:配置kubelet的CGroup为systemd---kind: KubeletConfigurationapiVersion: kubelet.config.k8s.io/v1beta1cgroupDriver: systemd
- 下载镜像
kubeadm config images pull --image-repository=registry.aliyuncs.com/google_containers --kubernetes-version=v1.26.2[config/images] Pulled registry.aliyuncs.com/google_containers/kube-apiserver:v1.26.2[config/images] Pulled registry.aliyuncs.com/google_containers/kube-controller-manager:v1.26.2[config/images] Pulled registry.aliyuncs.com/google_containers/kube-scheduler:v1.26.2[config/images] Pulled registry.aliyuncs.com/google_containers/kube-proxy:v1.26.2[config/images] Pulled registry.aliyuncs.com/google_containers/pause:3.9[config/images] Pulled registry.aliyuncs.com/google_containers/etcd:3.5.6-0[config/images] Pulled registry.aliyuncs.com/google_containers/coredns:v1.9.3
- 初始化
kubeadm init --config kubeadm.yaml
等待一些时间后有如下输出表示master节点已经初始化完成
[init] Using Kubernetes version: v1.26.2[preflight] Running pre-flight checks[WARNING Service-Kubelet]: kubelet service is not enabled, please run 'systemctl enable kubelet.service'[preflight] Pulling images required for setting up a Kubernetes cluster[preflight] This might take a minute or two, depending on the speed of your internet connection[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'[certs] Using certificateDir folder "/etc/kubernetes/pki"[certs] Generating "ca" certificate and key[certs] Generating "apiserver" certificate and key[certs] apiserver serving cert is signed for DNS names [kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local master] and IPs [10.96.0.1 192.168.84.128][certs] Generating "apiserver-kubelet-client" certificate and key[certs] Generating "front-proxy-ca" certificate and key[certs] Generating "front-proxy-client" certificate and key[certs] Generating "etcd/ca" certificate and key[certs] Generating "etcd/server" certificate and key[certs] etcd/server serving cert is signed for DNS names [localhost master] and IPs [192.168.84.128 127.0.0.1 ::1][certs] Generating "etcd/peer" certificate and key[certs] etcd/peer serving cert is signed for DNS names [localhost master] and IPs [192.168.84.128 127.0.0.1 ::1][certs] Generating "etcd/healthcheck-client" certificate and key[certs] Generating "apiserver-etcd-client" certificate and key[certs] Generating "sa" key and public key[kubeconfig] Using kubeconfig folder "/etc/kubernetes"[kubeconfig] Writing "admin.conf" kubeconfig file[kubeconfig] Writing "kubelet.conf" kubeconfig file[kubeconfig] Writing "controller-manager.conf" kubeconfig file[kubeconfig] Writing "scheduler.conf" kubeconfig file[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"[kubelet-start] Starting the kubelet[control-plane] Using manifest folder "/etc/kubernetes/manifests"[control-plane] Creating static Pod manifest for "kube-apiserver"[control-plane] Creating static Pod manifest for "kube-controller-manager"[control-plane] Creating static Pod manifest for "kube-scheduler"[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s[apiclient] All control plane components are healthy after 7.001191 seconds[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace[kubelet] Creating a ConfigMap "kubelet-config" in namespace kube-system with the configuration for the kubelets in the cluster[upload-certs] Skipping phase. Please see --upload-certs[mark-control-plane] Marking the node master as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers][mark-control-plane] Marking the node master as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule][bootstrap-token] Using token: abcdef.0123456789abcdef[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to get nodes[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials[bootstrap-token] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token[bootstrap-token] Configured RBAC rules to allow certificate rotation for all node client certificates in the cluster[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key[addons] Applied essential addon: CoreDNS[addons] Applied essential addon: kube-proxyYour Kubernetes control-plane has initialized successfully!To start using your cluster, you need to run the following as a regular user: mkdir -p $HOME/.kube sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config sudo chown $(id -u):$(id -g) $HOME/.kube/configAlternatively, if you are the root user, you can run: export KUBECONFIG=/etc/kubernetes/admin.confYou should now deploy a pod network to the cluster.Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at: https://kubernetes.io/docs/concepts/cluster-administration/addons/Then you can join any number of worker nodes by running the following on each as root:kubeadm join 192.168.84.128:6443 --token abcdef.0123456789abcdef \--discovery-token-ca-cert-hash sha256:13dbb19283360eed18d55d1ee758e183ad79f123dc2598afaa76eac0dc0d9890
- 创建相关目录
[root@master ~]# mkdir -p $HOME/.kube[root@master ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config[root@master ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config
node节点加入到集群
[root@node ~]# kubeadm join 192.168.200.5:6443 --token abcdef.0123456789abcdef --discovery-token-ca-cert-hash sha256:7d52da1b42af69666db3483b30a389ab143a1a199b500843741dfd5f180bcb3f
有如下内容表示node节点加入k8s集群成功
[preflight] Running pre-flight checks[WARNING Service-Kubelet]: kubelet service is not enabled, please run 'systemctl enable kubelet.service'[preflight] Reading configuration from the cluster...[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"[kubelet-start] Starting the kubelet[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...This node has joined the cluster:* Certificate signing request was sent to apiserver and a response was received.* The Kubelet was informed of the new secure connection details.Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
验证(master节点)
[root@master ~]# kubectl get nodes
如下输出
NAME STATUS ROLES AGE VERSIONmaster NotReady control-plane 3m25s v1.26.0node NotReady 118s v1.26.0
[root@master ~]# kubectl get pod -Akube-system coredns-5bbd96d687-7d6dl 0/1 Pending 0 2m47skube-system coredns-5bbd96d687-frz87 0/1 Pending 0 2m47skube-system etcd-master 1/1 Running 0 3mkube-system kube-apiserver-master 1/1 Running 0 3mkube-system kube-controller-manager-master 1/1 Running 0 3mkube-system kube-proxy-7tnw2 1/1 Running 0 2m47skube-system kube-proxy-lmh4z 1/1 Running 0 74skube-system kube-scheduler-master 1/1 Running 0 3m1s
注意:此时2个节点(一个master,一个node)此时是notReady,同时查看pod,发现coredns是pending状态,这是正常的。是由于没有安装网络插件,需要安装网络插件。这里我选择了calico
安装网络插件(master节点)
1.根据自己的选择去安装网络插件。这里我选择calico
curl https://raw.githubusercontent.com/projectcalico/calico/v3.25.0/manifests/calico.yaml -O
- 安装
kubectl apply -f calico.yaml
问题:此时k8s拉取calico镜像失败,报错如下
Dec 02 21:58:49 node2 containerd[7285]: time="2022-12-02T21:58:49.627179374+08:00" level=error msg="Failed to handle backOff event &ImageCreate{Name:docker.io/calico/cni:v3.24.5,Labels:map[string]string{io.cri-containerd.image: managed,},XXX_unrecognized:[],} for docker.io/calico/cni:v3.24.5" error="update image store for \"docker.io/calico/cni:v3.24.5\": get image info from containerd: get image diffIDs: unexpected media type text/html for sha256:0238205317023105e6589e4001e2a3d81b84c71740b0d9563f6157ddb32c4ea4: not found"Dec 02 21:58:50 node2 containerd[7285]: time="2022-12-02T21:58:50.627858264+08:00" level=info msg="ImageCreate event &ImageCreate{Name:docker.io/calico/kube-controllers:v3.24.5,Labels:map[string]string{io.cri-containerd.image: managed,},XXX_unrecognized:[],}"Dec 02 21:58:50 node2 containerd[7285]: time="2022-12-02T21:58:50.627954441+08:00" level=error msg="Failed to handle backOff event &ImageCreate{Name:docker.io/calico/kube-controllers:v3.24.5,Labels:map[string]string{io.cri-containerd.image: managed,},XXX_unrecognized:[],} for docker.io/calico/kube-controllers:v3.24.5" error="update image store for \"docker.io/calico/kube-controllers:v3.24.5\": get image info from containerd: get image diffIDs: unexpected media type text/html for sha256:0238205317023105e6589e4001e2a3d81b84c71740b0d9563f6157ddb32c4ea4: not found"Dec 02 21:59:01 node2 containerd[7285]: time="2022-12-02T21:59:01.446288924+08:00" level=info msg="Events for \"docker.io/calico/cni:v3.24.5\" is in backoff, enqueue event &ImageDelete{Name:docker.io/calico/cni:v3.24.5,XXX_unrecognized:[],}"Dec 02 21:59:10 node2 containerd[7285]: time="2022-12-02T21:59:10.628919112+08:00" level=info msg="ImageCreate event &ImageCreate{Name:docker.io/calico/node:v3.24.5,Labels:map[string]string{io.cri-containerd.image: managed,},XXX_unrecognized:[],}"Dec 02 21:59:10 node2 containerd[7285]: time="2022-12-02T21:59:10.629086106+08:00" level=error msg="Failed to handle backOff event &ImageCreate{Name:docker.io/calico/node:v3.24.5,Labels:map[string]string{io.cri-containerd.image: managed,},XXX_unrecognized:[],} for docker.io/calico/node:v3.24.5" error="update image store for \"docker.io/calico/node:v3.24.5\": get image info from containerd: get image diffIDs: unexpected media type text/html for sha256:0238205317023105e6589e4001e2a3d81b84c71740b0d9563f6157ddb32c4ea4: not found"Dec 02 21:59:39 node2 containerd[7285]: time="2022-12-02T21:59:39.956573672+08:00" level=error msg="(*service).Write failed" error="rpc error: code = Canceled desc = context canceled" expected="sha256:0797c5cde8c8c54e2b4c3035fc178e8e062d3636855b72b4cfbb37e7a4fb133d" ref="layer-sha256:0797c5cde8c8c54e2b4c3035fc178e8e062d3636855b72b4cfbb37e7a4fb133d" total=86081905
解决方案参考: https://blog.csdn.net/MssGuo/article/details/128149704
删除containerd 加速器配置
vim /etc/containerd/certs.d/docker.io/hosts.toml server = "https://docker.io"[host."https://xxxmyelo.mirror.aliyuncs.com"]#这个配置为自己阿里云的镜像加速地址 capabilities = ["pull", "resolve"]root@node2:~# systemctl stop containerd.service && systemctl start containerd.service 还是不行啊啊啊啊啊啊啊啊啊。听网友说,删掉了步骤一中的配置的containerd镜像加速器[plugins."io.containerd.grpc.v1.cri".registry] config_path = ""#改回空地址
重启containerd
systemctl restart containerd
过段时间 查看pod运行状况
watch kubectl get pods -A
此时发现所有的pod都已经成功运行了
kube-system calico-kube-controllers-57b57c56f-mc5bc 1/1 Running 0 3m45skube-system calico-node-jrggj 1/1 Running 0 3m45skube-system calico-node-wq6q2 1/1 Running 0 3m45skube-system coredns-5bbd96d687-7d6dl 1/1 Running 0 8m30skube-system coredns-5bbd96d687-frz87 1/1 Running 0 8m30skube-system etcd-master 1/1 Running 0 8m43skube-system kube-apiserver-master 1/1 Running 0 8m43skube-system kube-controller-manager-master 1/1 Running 0 8m43skube-system kube-proxy-7tnw2 1/1 Running 0 8m30skube-system kube-proxy-lmh4z 1/1 Running 0 6m57skube-system kube-scheduler-master 1/1 Running 0 8m44s
此时coredns和calico相关的pod都是已经running状态。
查看节点状态
[root@master ~]# kubectl get nodesNAME STATUS ROLES AGE VERSIONmaster Ready control-plane 12m v1.26.2node1 Ready 10m v1.26.2
此时发现两个节点均是Ready状态。
至此,k8s-v1.26.2版本安装成功。
测试k8s集群
[root@master ~]# kubectl create deployment httpd --image=httpd[root@master ~]# kubectl expose deployment httpd --port=80 --type=NodePort[root@master ~]# kubectl get pod,svcNAME READY STATUS RESTARTS AGEpod/httpd-975f8444c-t6z2c 1/1 Running 0 75sNAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGEservice/httpd NodePort 10.106.224.79 80:31409/TCP 63sservice/kubernetes ClusterIP 10.96.0.1 443/TCP 14m
访问:http://192.168.84.128:31409
浏览器显示如下:
It works!#成功了