目录 ◀

一、环境

Kubernetes 3个节点都为 master 角色，高可用方案采用 keepalived + haproxy，如果服务器在云上则建议采用 SLB + haproxy。

服务器信息表：

主机名	系统	配置	IP	安装软件
k8s1	Centos 7.6.1810	2C4G	10.0.0.101	Kubernetes
k8s2	Centos 7.6.1810	2C4G	10.0.0.102	Kubernetes
k8s3	Centos 7.6.1810	2C4G	10.0.0.103	Kubernetes
nginx1	Centos 7.6.1810	1C1G	10.0.0.104	keepalived、haproxy、openresty
nginx2	Centos 7.6.1810	1C1G	10.0.0.105	keepalived、haproxy、openresty

软件版本：

软件	版本
Kubernetes	1.22.2
haproxy	2.2.22
keepalived	2.0.20
openresty	1.19.3.1

二、环境初始化

1、所有节点初始化

#!/bin/bash
set -e

# 关闭防火墙和 selinux（阿里云环境不需要，请注释）
systemctl stop firewalld && systemctl disable firewalld
sed -i '/^SELINUX=/c SELINUX=disabled' /etc/selinux/config
setenforce 0

# 换源（阿里云环境不需要，请注释）
mkdir /etc/yum.repos.d/centos
mv /etc/yum.repos.d/*.repo /etc/yum.repos.d/centos/
curl -o /etc/yum.repos.d/CentoS-7.repo http://mirrors.aliyun.com/repo/Centos-7.repo    
curl -o /etc/yum.repos.d/epel7.repo http://mirrors.aliyun.com/repo/epel-7.repo
yum install -y gcc gcc-c++ vim net-tools telnet dos2unix iptraf ntp tree sysstat lrzsz bash-completion chrony wget unzip

# DNS 优化（阿里云环境不需要，请注释）
sed -i 's/#PermitRootLogin yes/PermitRootLogin yes/g' /etc/ssh/sshd_config
sed -i 's/#UseDNS yes/UseDNS no/g' /etc/ssh/sshd_config
sed -i 's/GSSAPIAuthentication yes/GSSAPIAuthentication no/g' /etc/ssh/sshd_config
systemctl restart sshd

# 时间同步（阿里云环境不需要，请注释）
timedatectl set-timezone Asia/Shanghai
sed -i 's/^server/#&/' /etc/chrony.conf
#echo "server cn.pool.ntp.org iburst" >> /etc/chrony.conf
cat >> /etc/chrony.conf << EOF
server ntp1.aliyun.com minpoll 4 maxpoll 10 iburst
server ntp2.aliyun.com minpoll 4 maxpoll 10 iburst
server ntp3.aliyun.com minpoll 4 maxpoll 10 iburst
server ntp4.aliyun.com minpoll 4 maxpoll 10 iburst
server ntp5.aliyun.com minpoll 4 maxpoll 10 iburst
server ntp6.aliyun.com minpoll 4 maxpoll 10 iburst
EOF
systemctl restart chronyd.service
systemctl enable chronyd.service

# 文件句柄和线程限制
cat >> /etc/security/limits.conf << EOF 
root            soft    nofile          655350
root            hard    nofile          655350
*               soft    nofile          655350
*               hard    nofile          655350
*               soft    nofile          655350
*               hard    nofile          655350
EOF
# 改为：*          soft    nproc     unlimited
sed -i '/^\*/s/4096/unlimited/' /etc/security/limits.d/20-nproc.conf

# 内核参数优化
cat >> /etc/sysctl.conf << EOF 
net.ipv4.tcp_tw_reuse = 1
net.ipv4.ip_local_port_range = 10000 65000
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_tw_buckets = 36000
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_fin_timeout = 30
vm.swappiness=0
vm.max_map_count = 262144
kernel.pid_max = 65535
net.core.somaxconn=33268
EOF
sysctl -p

# 关闭 swap 
swapoff -a
sed -i 's/^.*centos-swap/#&/g' /etc/fstab

# 修改服务的文件和进程限制
echo "DefaultLimitNOFILE=65535" >> /etc/systemd/system.conf 
echo "DefaultLimitNPROC=65535" >> /etc/systemd/system.conf
systemctl daemon-reexec

# 配置主机映射
cat << EOF >> /etc/hosts
10.0.0.101 k8s1
10.0.0.102 k8s2
10.0.0.103 k8s3
10.0.0.104 nginx1
10.0.0.105 nginx2
EOF

2、k8s 节点初始化

#!/bin/bash
set -e

# 激活 br_netfilter 模块
modprobe br_netfilter
cat << EOF > /etc/modules-load.d/k8s.conf
br_netfilter
EOF

# 允许 iptables 对 bridge 的数据进行处理
cat << EOF > /etc/sysctl.d/k8s.conf 
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF
sysctl --system

# 安装 docker
yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum install -y docker-ce
systemctl enable docker && systemctl start docker 
cat << EOF > /etc/docker/daemon.json
{
  "data-root": "/data/docker",
  "exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
systemctl enable docker && systemctl restart docker 

# 配置 k8s 源
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=http://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=http://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg http://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

# 安装 kubeadm、kubectl、kubelet
yum install -y --nogpgcheck kubeadm-1.22.2 kubectl-1.22.2 kubelet-1.22.2
systemctl enable kubelet
kubeadm completion bash > /etc/bash_completion.d/kubeadm
kubectl completion bash >/etc/bash_completion.d/kubectl
source /etc/bash_completion.d/kubeadm
source /etc/bash_completion.d/kubectl

三、keepalived 和 haproxy 部署

参考文档：https://github.com/kubernetes/kubeadm/blob/main/docs/ha-considerations.md

1、haprxoy

编译安装：

wget https://www.haproxy.org/download/2.2/src/haproxy-2.2.22.tar.gz
yum install -y systemd-devel

tar -xf haproxy-2.2.22.tar.gz -C /usr/local/
cd /usr/local/haproxy-2.2.22/ 
make TARGET=linux-glibc USE_SYSTEMD=1
make install

注册到systemd：

cat  > /usr/lib/systemd/system/haproxy.service << EOF
[Unit]
Description=HAProxy Load Balancer
After=syslog.target network.target

[Service]
ExecStartPre=/usr/local/sbin/haproxy -f /etc/haproxy/haproxy.cfg   -c -q
ExecStart=/usr/local/sbin/haproxy -Ws -f /etc/haproxy/haproxy.cfg  -p /run/haproxy.pid
ExecReload=/bin/kill -USR2 $MAINPID

[Install]
WantedBy=multi-user.target
EOF

准备配置文件：

mkdir /etc/haproxy/
cat > /etc/haproxy/haproxy.cfg << EOF
#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
    # log /dev/log local0
    # log /dev/log local1 notice
    log 127.0.0.1 local2 info
    daemon

#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaults
    mode                    http
    log                     global
    option                  httplog
    option                  dontlognull
    option http-server-close
    option forwardfor       except 127.0.0.0/8
    option                  redispatch
    retries                 1
    timeout http-request    10s
    timeout queue           20s
    timeout connect         5s
    timeout client          20s
    timeout server          20s
    timeout http-keep-alive 10s
    timeout check           10s

#---------------------------------------------------------------------
# apiserver frontend which proxys to the control plane nodes
#---------------------------------------------------------------------
frontend apiserver
    bind *:6443
    mode tcp
    option tcplog
    default_backend apiserver

#---------------------------------------------------------------------
# round robin balancing for apiserver
#---------------------------------------------------------------------
backend apiserver
    option httpchk GET /healthz
    http-check expect status 200
    mode tcp
    option ssl-hello-chk
    balance     roundrobin
        server k8s1 10.0.0.101:6443 check
        server k8s2 10.0.0.102:6443 check
        server k8s3 10.0.0.103:6443 check

listen admin_stats      # 配置状态页
    mode http           # 运行模式
    bind *:1080         # 监听端口
    stats enable        # 启用状态页
    stats hide-version  # 隐藏版本
    maxconn 1024        # 最大连接数
    stats refresh 30s   # 页面自动刷新时间30s
    stats uri /stats    # 访问路径
    #stats admin if TRUE # 启用管理功能
    stats auth admin:000000  # 账号密码
EOF

systemctl enable haproxy
systemctl start haproxy

2、keepalived

编译安装：

wget --no-check-certificate https://www.keepalived.org/software/keepalived-2.0.20.tar.gz 
yum install -y openssl-devel libnl libnl-devel

tar -xf keepalived-2.0.20.tar.gz -C /usr/local/
cd /usr/local/keepalived-2.0.20
./configure --prefix=/usr/local/keepalived/
make USE_SYSTEMD=1
make install

修改配置文件：

ln -s /usr/local/keepalived/etc/keepalived /etc/keepalived
cp /etc/keepalived/keepalived.con{f,f.bak}

nignx1 的配置文件：

cat > /etc/keepalived/keepalived.conf << EOF
! /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
    router_id nginx1
}
vrrp_script check_apiserver {
  script "/etc/keepalived/check_apiserver.sh"
  interval 3
  weight -2
  fall 10
  rise 2
}

vrrp_instance VI_1 {
    state MASTER
    interface ens33
    virtual_router_id 1
    priority 101
    authentication {
        auth_type PASS
        auth_pass 0000
    }
    virtual_ipaddress {
        10.0.0.100
    }
    track_script {
        check_apiserver
    }
}
EOF
systemctl enable keepalived
systemctl start keepalived

nginx2 的配置文件：

cat > /etc/keepalived/keepalived.conf << EOF
! /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
    router_id nginx2
}
vrrp_script check_apiserver {
  script "/etc/keepalived/check_apiserver.sh"
  interval 3
  weight -2
  fall 10
  rise 2
}

vrrp_instance VI_1 {
    state BACKUP
    interface ens33
    virtual_router_id 1
    priority 100
    authentication {
        auth_type PASS
        auth_pass 0000
    }
    virtual_ipaddress {
        10.0.0.100
    }
    track_script {
        check_apiserver
    }
}
EOF
systemctl enable keepalived
systemctl start keepalived

准备健康检查脚本：

cat > /etc/keepalived/check_apiserver.sh << EOF
#!/bin/sh

errorExit() {
    echo "*** $*" 1>&2
    exit 1
}

curl --silent --max-time 2 --insecure https://localhost:6443/ -o /dev/null || errorExit "Error GET https://localhost:${APISERVER_DEST_PORT}/"
if ip addr | grep -q 10.0.0.100; then
    curl --silent --max-time 2 --insecure https://10.0.0.100:6443/ -o /dev/null || errorExit "Error GET https://10.0.0.100:6443/"
fi
EOF

四、部署 kubernetes 集群

我这里 kubeadm 是重新编译的，证书期限为 10 年。

kubeadm init --control-plane-endpoint 10.0.0.100:6443 --upload-certs --image-repository=registry.aliyuncs.com/google_containers --kubernetes-version=v1.22.2 --service-cidr=10.96.0.0/16 --pod-network-cidr=10.244.0.0/16 --ignore-preflight-errors=Swap

成功初始化可以看到以下信息：

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

You can now join any number of the control-plane node running the following command on each as root:

  kubeadm join 10.0.0.100:6443 --token m8d5kv.v90803q5327gjqgz \
    --discovery-token-ca-cert-hash sha256:7d1e394443e4aea3b27919d90d25c63bf79edaba003bce454fb349f1c6b09b78 \
    --control-plane --certificate-key ca3c8e52b99fc9ea32964b4ed52a6e74603b5b7c4acfa99016196979f5c20773

Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 10.0.0.100:6443 --token m8d5kv.v90803q5327gjqgz \
    --discovery-token-ca-cert-hash sha256:7d1e394443e4aea3b27919d90d25c63bf79edaba003bce454fb349f1c6b09b78

根据提示进行操作，因为我们是 3 个 master 节点，所以要选择添加 control-plane node 那条命令去另外两台服务器执行。

[root@k8s1 ~]# kubectl get node
NAME   STATUS     ROLES                  AGE     VERSION
k8s1   NotReady   control-plane,master   24m     v1.22.2
k8s2   NotReady   control-plane,master   2m30s   v1.22.2
k8s3   NotReady   control-plane,master   2m27s   v1.22.2

# 修改污点，允许在 master 节点上调度 Pod
kubectl taint nodes --all node-role.kubernetes.io/master-

五、网络组件 calico

参考文档：https://projectcalico.docs.tigera.io/archive/v3.18/getting-started/kubernetes/self-managed-onprem/onpremises

1、安装 calico

下载安装 yaml 文件，后面我们要对文件进行自定义。由于 etcd 开启了 tls ，指定 etcd 证书

curl https://docs.projectcalico.org/archive/v3.18/manifests/calico-etcd.yaml -o calico.yaml

修改 etcd 集群地址，由于 etcd 开启了 tls，我们需要指定证书。

kind: ConfigMap
apiVersion: v1
metadata:
  name: calico-config
  namespace: kube-system
data:
  # Configure this with the location of your etcd cluster.
  etcd_endpoints: "https://10.0.0.101:2379,https://10.0.0.102:2379,https://10.0.0.103:2379"
  # If you're using TLS enabled etcd uncomment the following.
  # You must also populate the Secret below with these files.
  etcd_ca: "/calico-secrets/etcd-ca"      # "/calico-secrets/etcd-ca"
  etcd_cert: "/calico-secrets/etcd-cert"  # "/calico-secrets/etcd-cert"
  etcd_key: "/calico-secrets/etcd-key"    # "/calico-secrets/etcd-key"

先获取证书 base64 编码，再在 Secret calico-etcd-secrets 中，取消注释 etcd_ca、etcd_key 和 etcd_cert 填入前面获取的 base64 编码。

# 获取证书 base64 编码
cat /etc/kubernetes/pki/etcd/ca.crt | base64 -w 0               # calico-etcd-secrets etcd-ca
cat /etc/kubernetes/pki/apiserver-etcd-client.crt | base64 -w 0 # calico-etcd-secrets etcd-cert
cat /etc/kubernetes/pki/apiserver-etcd-client.key | base64 -w 0 # calico-etcd-secrets etcd-key

# 填入证书
apiVersion: v1
kind: Secret
type: Opaque
metadata:
  name: calico-etcd-secrets
  namespace: kube-system
data:
  # Populate the following files with etcd TLS configuration if desired, but leave blank if
  # not using TLS for etcd.
  # This self-hosted install expects three files with the following names.  The values
  # should be base64 encoded strings of the entire contents of each file.
  etcd-key: xxx
  etcd-cert: xxx
  etcd-ca: xxx

修改 Deployment calico-kube-controllers 的 secret 文件权限：

      volumes:
        # Mount in the etcd TLS secrets with mode 400.
        # See https://kubernetes.io/docs/concepts/configuration/secret/
        - name: etcd-certs
          secret:
            secretName: calico-etcd-secrets
            defaultMode: 0440

修改 IP 自动发现方式，IP 段为主机所在 IP 段。在 DaemonSet 里新增下面这个环境变量。

            - name: IP_AUTODETECTION_METHOD
              value: "cidr=10.0.0.0/24"

修改完毕，执行安装：

kubectl apply -f calico.yaml

2、配置双 RR

安装 calicoctl：

# 安装 calicoctl
curl -O -L  https://github.com/projectcalico/calicoctl/releases/download/v3.18.6/calicoctl
chmod a+x calicoctl
mv calicoctl /usr/local/bin/

# 准备配置文件
mkdir /etc/calico
cat > /etc/calico/calicoctl.cfg << EOF
apiVersion: projectcalico.org/v3
kind: CalicoAPIConfig
metadata:
spec:
  etcdEndpoints: "https://10.0.0.101:2379,https://10.0.0.102:2379,https://10.0.0.103:2379"
  etcdKeyFile: /etc/kubernetes/pki/etcd/server.key
  etcdCertFile: /etc/kubernetes/pki/etcd/server.crt
  etcdCACertFile: /etc/kubernetes/pki/etcd/ca.crt
EOF

[root@k8s1 ~]# calicoctl get node
NAME   
k8s1   
k8s2   
k8s3

配置 bgp，禁用 Full-mesh（全互联模式）：

[root@k8s1 calico]# vim bgpconfig_default.yaml 
apiVersion: projectcalico.org/v3
kind: BGPConfiguration
metadata:
  name: default
spec:
  logSeverityScreen: Info
  nodeToNodeMeshEnabled: false

[root@k8s1 calico]# calicoctl create -f bgpconfig_default.yaml
Successfully created 1 'BGPConfiguration' resource(s)

[root@k8s1 calico]# calicoctl get bgpconfig
NAME      LOGSEVERITY   MESHENABLED   ASNUMBER   
default   Info          false         -

配置 BGP node 与 Route Reflector（rr）的连接建立规则。设定规则，通过标签区分节点角色：

[root@k8s1 calico]# vim bgppeer.yaml 
kind: BGPPeer
apiVersion: projectcalico.org/v3
metadata:
  name: peer-to-rrs
spec:
  # 规则1：普通 bgp node 与 rr 建立连接
  nodeSelector: "!has(route-reflector)"
  peerSelector: has(route-reflector)
---
kind: BGPPeer
apiVersion: projectcalico.org/v3
metadata:
  name: rr-mesh
spec:
  # 规则2：rr 之间建立连接
  nodeSelector: has(route-reflector)
  peerSelector: has(route-reflector)

[root@k8s1 calico]# calicoctl create -f bgppeer.yaml
Successfully created 2 'BGPPeer' resource(s)
[root@k8s1 calico]# calicoctl get bgpPeer
NAME          PEERIP   NODE                    ASN   
peer-to-rrs            !has(route-reflector)   0     
rr-mesh                has(route-reflector)    0

添加 rr：

# 导出节点配置
[root@k8s1 calico]# calicoctl get node k8s2 --export -o yaml > rr-k8s2.yaml
[root@k8s1 calico]# calicoctl get node k8s3 --export -o yaml > rr-k8s3.yaml

# 修改配置，在对应块下增加以下配置
metadata:
  labels:
    # 增加标签，将 rr 标签置为 true
    route-reflector: "true"

spec:
  bgp:
    # 添加集群ID，rr-k8s2 与 rr-k8s3 保持一致，用于冗余和防环。
    routeReflectorClusterID: 10.255.11.11

[root@k8s1 calico]# calicoctl apply -f rr-k8s2.yaml 
Successfully applied 1 'Node' resource(s)
[root@k8s1 calico]# calicoctl apply -f rr-k8s3.yaml 
Successfully applied 1 'Node' resource(s)
[root@k8s1 calico]# calicoctl node status
Calico process is running.

IPv4 BGP status
+--------------+---------------+-------+----------+-------------+
| PEER ADDRESS |   PEER TYPE   | STATE |  SINCE   |    INFO     |
+--------------+---------------+-------+----------+-------------+
| 10.0.0.102   | node specific | up    | 06:55:25 | Established |
| 10.0.0.103   | node specific | up    | 06:55:28 | Established |
+--------------+---------------+-------+----------+-------------+

IPv6 BGP status
No IPv6 peers found.

我们可以在每个节点上执行 calicoctl node status 进行验证，可以看到一个节点 RR1 和 RR2 建立连接，其他节点分别与 RR1 和 RR2 建立连接，互相并不直连。

六、集群外服务器直通 Pod

给 nginx 两台机器添加 IPIP 隧道网卡，注意：由于一对多 IPIP 隧道由路由规则控制，网卡名固定为 tunl0。
nginx1 配置为 10.222.1.2，nginx2 配置为 10.222.1.3。

# 创建 tunl0 网卡
ip tunnel add tunl0 mode ipip
ip link set tunl0 up

# 配置IPIP隧道网卡IP，该IP不应为集群IP，不与其他私有IP冲突
ip addr add 10.222.1.2/32 dev tunl0

添加 k8s 集群路由，网关为 k8s node 实例 IP。注意网段和其对应网关需要执行 calicoctl ipam check 获取。

# k8s 节点执行
[root@k8s1 calico]# calicoctl ipam check
Checking IPAM for inconsistencies...

Loading all IPAM blocks...
Found 3 IPAM blocks.
 IPAM block 10.244.109.64/26 affinity=host:k8s2:
 IPAM block 10.244.166.192/26 affinity=host:k8s1:
 IPAM block 10.244.219.0/26 affinity=host:k8s3:
IPAM blocks record 5 allocations.

# nginx节点执行
[root@nginx1 ~]# ip route add 10.244.109.64/26 via 10.0.0.102 dev tunl0 onlink
[root@nginx1 ~]# ip route add 10.244.166.192/26 via 10.0.0.101 dev tunl0 onlink
[root@nginx1 ~]# ip route add 10.244.219.0/26 via 10.0.0.103 dev tunl0 onlink

k8s 所有节点添加回程路由：

ip route add 10.222.1.2/32 via 10.0.0.104 dev tunl0 onlink
ip route add 10.222.1.3/32 via 10.0.0.105 dev tunl0 onlink

最后还需要修改 calico 配置，需要修改外部节点列表：

[root@k8s1 calico]# cat FelixConfiguration_default.yaml 
apiVersion: projectcalico.org/v3
kind: FelixConfiguration
metadata:
  name: default
spec:
  bpfLogLevel: ""
  ipipEnabled: true
  logSeverityScreen: Info
  reportingInterval: 0s
  externalNodesList: ["10.0.0.104", "10.0.0.105"]

[root@k8s1 calico]# calicoctl apply -f FelixConfiguration_default.yaml 
Successfully applied 1 'FelixConfiguration' resource(s)

整体命令执行：

# nginx1 执行
ip tunnel add tunl0 mode ipip
ip link set tunl0 up
ip addr add 10.222.1.2/32 dev tunl0
ip route add 10.244.109.64/26 via 10.0.0.102 dev tunl0 onlink
ip route add 10.244.166.192/26 via 10.0.0.101 dev tunl0 onlink
ip route add 10.244.219.0/26 via 10.0.0.103 dev tunl0 onlink

# nginx2 执行
ip tunnel add tunl0 mode ipip
ip link set tunl0 up
ip addr add 10.222.1.3/32 dev tunl0
ip route add 10.244.109.64/26 via 10.0.0.102 dev tunl0 onlink
ip route add 10.244.166.192/26 via 10.0.0.101 dev tunl0 onlink
ip route add 10.244.219.0/26 via 10.0.0.103 dev tunl0 onlink

# 所有 k8s 节点执行
ip route add 10.222.1.2/32 via 10.0.0.104 dev tunl0 onlink
ip route add 10.222.1.3/32 via 10.0.0.105 dev tunl0 onlink

开机启动添加路由：

# 所有 nginx 节点执行（注意为 tunl0 设置的 IP）
cat >> /etc/rc.local << EOF
ip link set tunl0 up
ip addr add 10.222.1.2/32 dev tunl0
ip route add 10.244.109.64/26 via 10.0.0.102 dev tunl0 onlink
ip route add 10.244.166.192/26 via 10.0.0.101 dev tunl0 onlink
ip route add 10.244.219.0/26 via 10.0.0.103 dev tunl0 onlink
EOF
chmod a+x /etc/rc.local

# 所有 k8s 节点执行
cat >> /etc/rc.local << EOF
ip link set tunl0 up
ip route add 10.222.1.2/32 via 10.0.0.104 dev tunl0 onlink
ip route add 10.222.1.3/32 via 10.0.0.105 dev tunl0 onlink
EOF
chmod a+x /etc/rc.local

验证：

[root@k8s1 ~]# kubectl run test --image=nginx
pod/test created
[root@k8s1 ~]# kubectl get pod -o wide
NAME   READY   STATUS    RESTARTS   AGE    IP             NODE   NOMINATED NODE   READINESS GATES
test   1/1     Running   0          115s   10.244.219.1   k8s3   <none>           <none>

[root@nginx1 ~]# ping -c1 10.244.219.1
PING 10.244.219.1 (10.244.219.1) 56(84) bytes of data.
64 bytes from 10.244.219.1: icmp_seq=1 ttl=63 time=0.312 ms

--- 10.244.219.1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.312/0.312/0.312/0.000 ms
[root@nginx1 ~]# curl 10.244.219.1
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
......

七、其它组件部署

1、ingress-nginx

官方文档：https://kubernetes.github.io/ingress-nginx/deploy/
获取安装 yaml 文件

wget https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.2.0/deploy/static/provider/baremetal/deploy.yaml

先给节点设置标签，用于控制 ingress-nginx-controller 运行到指定节点上。

kubectl label nodes k8s1 ingress-nginx-node=true
kubectl label nodes k8s2 ingress-nginx-node=true

修改 Deployment ingress-nginx-controller，副本数设置为 2，启用 hostNetwork，nodeSelector 添加配置。

  replicas: 2
......
      hostNetwork: true
      nodeSelector:
        ingress-nginx-node: "true"

部署：

[root@k8s1 ingress-nginx]# kubectl apply -f deploy.yaml
[root@k8s1 ingress-nginx]# kubectl get pod -n ingress-nginx -o wide
NAME                                       READY   STATUS      RESTARTS   AGE     IP             NODE   NOMINATED NODE   READINESS GATES
ingress-nginx-admission-create--1-xfz7k    0/1     Completed   0          38m     10.244.219.8   k8s3   <none>           <none>
ingress-nginx-controller-577876995-8wgzc   1/1     Running     0          4m27s   10.0.0.101     k8s1   <none>           <none>
ingress-nginx-controller-577876995-wggm4   1/1     Running     0          4m28s   10.0.0.102     k8s2   <none>           <none>

安装 ingress-nginx kubectl 插件，可能需要梯子。我这里是安装好 krew 和插件，打包 /root/.krew/ 目录，解压到对应机器就好了。

[root@k8s1 ~]# tar -xf krew.tar.gz
[root@k8s1 ~]# export PATH="${KREW_ROOT:-$HOME/.krew}/bin:$PATH"
[root@k8s1 ~]# kubectl krew version
OPTION            VALUE
GitTag            v0.4.3
GitCommit         dbfefa5
IndexURI          https://github.com/kubernetes-sigs/krew-index.git
BasePath          /root/.krew
IndexPath         /root/.krew/index/default
InstallPath       /root/.krew/store
BinPath           /root/.krew/bin
DetectedPlatform  linux/amd64

[root@k8s1 ~]# kubectl krew list
PLUGIN         VERSION
images         v0.3.7
ingress-nginx  v0.31.0
krew           v0.4.3

2、dashboard

官方文档：https://github.com/kubernetes/dashboard

[root@k8s1 dashboard]# wget https://raw.githubusercontent.com/kubernetes/dashboard/v2.5.1/aio/deploy/recommended.yaml
[root@k8s1 dashboard]# kubectl apply -f recommended.yaml 

[root@k8s1 dashboard]# kubectl get pod -n kubernetes-dashboard
NAME                                        READY   STATUS    RESTARTS   AGE
dashboard-metrics-scraper-c45b7869d-spc8w   1/1     Running   0          3m45s
kubernetes-dashboard-79b5779bf4-m8kpp       1/1     Running   0          3m45s

创建用户，绑定集群角色，获取 token。

[root@k8s1 dashboard]# vim dashboard-user.yaml 
apiVersion: v1
kind: ServiceAccount 
metadata:
  name: admin-user 
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: admin-user
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
- kind: ServiceAccount
  name: admin-user
  namespace: kube-system

[root@k8s1 dashboard]# kubectl apply -f dashboard-user.yaml 

[root@k8s1 dashboard]# kubectl -n kube-system get secret $(kubectl -n kube-system get sa/admin-user -o jsonpath="{.secrets[0].name}") -o go-template="{{.data.token | base64decode}}" > dashboard-token.txt

3、Metrics Server

官方文档：https://github.com/kubernetes-sigs/metrics-server

[root@k8s1 ~]# wget https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/high-availability.yaml
[root@k8s1 ~]# vim high-availability   # 增加变量
......
    spec:
      containers:
      - args:
        - --kubelet-insecure-tls    # 新增
......
[root@k8s1 ~]# kubectl apply -f high-availability.yaml
serviceaccount/metrics-server created

[root@k8s1 ~]# kubectl top node
NAME   CPU(cores)   CPU%   MEMORY(bytes)   MEMORY%   
k8s1   156m         7%     1437Mi          39%       
k8s2   205m         10%    1207Mi          44%       
k8s3   187m         9%     1234Mi          45%

八、优化

1、kubelet 预留资源

官方文档：https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/
根据实际情况预留：

[root@k8s1 ~]# vim /var/lib/kubelet/config.yaml
# 为系统预留资源
systemReserved:
  cpu: "100m"
  memory: "256Mi"
# 为 kubernetes 系统守护进程预留资源
kubeReserved:
  cpu: "100m"
  memory: "256Mi"
# 配置硬驱逐阈值，内存低于阈值会驱逐 pod
evictionHard:
  memory.available: "128Mi"

[root@k8s1 ~]# systemctl restart kubele
[root@k8s1 ~]# kubectl describe nodes k8s1 | grep -E -A 5 'Capacity:|Allocatable:'
Capacity:
  cpu:                2
  ephemeral-storage:  17394Mi
  hugepages-1Gi:      0
  hugepages-2Mi:      0
  memory:             3861512Ki
--
Allocatable:
  cpu:                1800m
  ephemeral-storage:  17394Mi
  hugepages-1Gi:      0
  hugepages-2Mi:      0
  memory:             3206152Ki

可分配的为 allocatale = capacity – system_reserved – kube_reserved – eviction_hard