From d45637056a804264e83b60e1395a0739001a0104 Mon Sep 17 00:00:00 2001 From: Khue Doan Date: Sun, 2 May 2021 23:07:16 +0700 Subject: [PATCH] Move RKE cluster to sperate module, with cleaner Ansible check --- .gitignore | 1 + docs/todo.md | 19 +- .../kubernetes-cluster/ansible/main.yml | 9 + infra/modules/kubernetes-cluster/main.tf | 179 +++++++++++++++++ infra/modules/kubernetes-cluster/terraform.tf | 18 ++ infra/platform.tf | 180 +----------------- infra/terraform.tf | 6 + 7 files changed, 226 insertions(+), 186 deletions(-) create mode 100644 infra/modules/kubernetes-cluster/ansible/main.yml create mode 100644 infra/modules/kubernetes-cluster/main.tf create mode 100644 infra/modules/kubernetes-cluster/terraform.tf diff --git a/.gitignore b/.gitignore index ad37ef1a..a0d083ba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .venv/ build/ metal/secrets/ +terraform-*-tmp-*/ *.log .terraform* diff --git a/docs/todo.md b/docs/todo.md index a16be8c8..0b3c3f50 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -1,21 +1,24 @@ # TODO +- [Better Terraform provider inheritance](../infra/terraform.tf#L8) - [(bug) Apply LXD VMs in parallel](../infra/Makefile#L12) - [(bug) Investigate why --noconfirm is not working](../tools/Dockerfile#L3) -- [(bug) Should be posible to put it in the profile instead lxd_profile.kubenode.config, and make it a variable](../infra/cluster.tf#L101) -- [(bug) Use containers instead of virtual machines for Kubernetes nodes https](../infra/cluster.tf#L94) -- [(feature) Automatic ingress and tunnel for all services](../infra/modules/kubernetes-cluster-bootstrap/main.tf#L85) +- [(bug) Should be posible to put it in the profile instead lxd_profile.kubenode.config, and make it a variable](../infra/modules/kubernetes-cluster/main.tf#L101) +- [(bug) Use containers instead of virtual machines for Kubernetes nodes https](../infra/modules/kubernetes-cluster/main.tf#L94) +- [(feature) Automatic ingress and tunnel for all services](../infra/modules/kubernetes-bootstrap/main.tf#L85) - [(feature) Enable etcd authentication and generate terraform backend config variables](../metal/roles/tfstate/tasks/main.yml#L43) - [(feature) Generate endpoint automatically (terragrunt for variable)](../infra/terraform.tf#L2) - [(feature) Upgrade hosts kernel to use Wireguard in container](../infra/modules/vpn/main.tf#L15) - [(optimize) Change to /var/lib/lxd/server.crt after https](../metal/roles/lxd/tasks/main.yml#L26) - [(optimize) Convert to YAML for Terraform yamldecode](../metal/hosts.ini#L1) -- [(optimize) DRY master and worker definition](../infra/cluster.tf#L89) -- [(optimize) Decide if VPN should be inside Kubernetes](../infra/vpn.tf#L1) -- [(optimize) HA Vault and auto unseal Vault](../infra/modules/kubernetes-cluster-bootstrap/main.tf#L82) -- [(optimize) LXD node SELinux](../metal/roles/lxd/tasks/main.yml#L1) +- [(optimize) Decide if VPN should be inside Kubernetes](../infra/base.tf#L1) +- [(optimize) DRY master and worker definition](../infra/modules/kubernetes-cluster/main.tf#L89) +- [(optimize) HA Vault and auto unseal Vault](../infra/modules/kubernetes-bootstrap/main.tf#L82) - [(optimize) LXD node firewall](../metal/roles/lxd/tasks/main.yml#L6) +- [(optimize) LXD node SELinux](../metal/roles/lxd/tasks/main.yml#L1) - [(optimize) Make parent interface a variable](../infra/modules/vpn/main.tf#L38) - [(optimize) Use btrfs in k8s 1.19.8 https](../metal/roles/lxd/templates/leader.yaml.j2#L17) -- [(optimize) Use metal values for MetalLB values](../infra/modules/kubernetes-cluster-bootstrap/values/metallb.yaml#L6) +- [(optimize) Use metal values for MetalLB values](../infra/modules/kubernetes-bootstrap/values/metallb.yaml#L6) - [(optimize) Use template for tfvars](../metal/roles/lxd/tasks/main.yml#L38) +- [Simple script to backup everything](../scripts/backup.sh#L3) +- [Simple script to restore everything](../scripts/restore.sh#L3) diff --git a/infra/modules/kubernetes-cluster/ansible/main.yml b/infra/modules/kubernetes-cluster/ansible/main.yml new file mode 100644 index 00000000..09d5e049 --- /dev/null +++ b/infra/modules/kubernetes-cluster/ansible/main.yml @@ -0,0 +1,9 @@ +- hosts: all + gather_facts: no + tasks: + - name: Wait for SSH connection + wait_for_connection: + delay: 10 + - name: Wait for Docker + wait_for: + path: /var/run/docker.sock diff --git a/infra/modules/kubernetes-cluster/main.tf b/infra/modules/kubernetes-cluster/main.tf new file mode 100644 index 00000000..d1973a99 --- /dev/null +++ b/infra/modules/kubernetes-cluster/main.tf @@ -0,0 +1,179 @@ +resource "tls_private_key" "ssh" { + algorithm = "ECDSA" + ecdsa_curve = "P256" +} + +resource "local_file" "ssh_private_key" { + content = tls_private_key.ssh.private_key_pem + filename = "${path.module}/private.pem" + file_permission = "0600" +} + +resource "lxd_profile" "kubenode" { + name = "kubenode" + + config = { + "limits.cpu" = 2 + "limits.memory" = "4GiB" + "limits.memory.swap" = false + # "security.nesting" = true + # "security.privileged" = true + # "linux.kernel_modules" = "ip_tables,ip6_tables,nf_nat,overlay,br_netfilter" + # "raw.lxc" = <<-EOT + # lxc.apparmor.profile=unconfined + # lxc.cap.drop= + # lxc.cgroup.devices.allow=a + # lxc.mount.auto=proc:rw sys:rw cgroup:rw + # EOT + "user.user-data" = <<-EOT + #cloud-config + ssh_authorized_keys: + - ${tls_private_key.ssh.public_key_openssh} + disable_root: false + runcmd: + - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - + - add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" + - apt-get update -y + - apt-get install -y docker-ce docker-ce-cli containerd.io open-iscsi + - mkdir -p /etc/systemd/system/docker.service.d/ + - printf "[Service]\nMountFlags=shared" > /etc/systemd/system/docker.service.d/mount_flags.conf + - mount --make-rshared / + - systemctl enable --now docker + - systemctl enable --now open-iscsi + EOT + } + + # # echo "262144" > /sys/module/nf_conntrack/parameters/hashsize + # device { + # type = "disk" + # name = "hashsize" + + # properties = { + # source = "/sys/module/nf_conntrack/parameters/hashsize" + # path = "/sys/module/nf_conntrack/parameters/hashsize" + # } + # } + + # device { + # type = "unix-char" + # name = "kmsg" + + # properties = { + # source = "/dev/kmsg" + # path = "/dev/kmsg" + # } + # } + + device { + name = "eth0" + type = "nic" + + properties = { + nictype = "macvlan" + parent = "eno1" + } + } + + device { + type = "disk" + name = "root" + + properties = { + pool = "default" + path = "/" + size = "32GiB" + } + } +} + +# TODO (optimize) DRY master and worker definition +resource "lxd_container" "masters" { + count = 3 + name = "master-${count.index}" + image = "ubuntu:20.04" + # TODO (bug) Use containers instead of virtual machines for Kubernetes nodes https://bugs.launchpad.net/ubuntu/+source/lxc/+bug/1226855 + type = "virtual-machine" + ephemeral = false + + profiles = [lxd_profile.kubenode.name] + + config = { + # TODO (bug) Should be posible to put it in the profile instead lxd_profile.kubenode.config, and make it a variable + # https://github.com/terraform-lxd/terraform-provider-lxd/blob/master/lxd/resource_lxd_container.go#L473 + "user.access_interface" = "enp5s0" + } +} + +resource "lxd_container" "workers" { + count = 3 + name = "worker-${count.index}" + image = "ubuntu:20.04" + type = "virtual-machine" + ephemeral = false + + profiles = [lxd_profile.kubenode.name] + + config = { + "user.access_interface" = "enp5s0" + } +} + +module "ansible_provisioner" { + source = "../ansible-provisioner" + directory = "${path.module}/ansible" + private_key = local_file.ssh_private_key.filename + inventory = concat( + lxd_container.masters.*.ip_address, + lxd_container.workers.*.ip_address + ) +} + +resource "rke_cluster" "cluster" { + dynamic "nodes" { + for_each = lxd_container.masters + + content { + address = nodes.value.ip_address + user = "root" + role = [ + "controlplane", + "etcd" + ] + ssh_key = tls_private_key.ssh.private_key_pem + } + } + + dynamic "nodes" { + for_each = lxd_container.workers + + content { + address = nodes.value.ip_address + user = "root" + role = [ + "worker" + ] + ssh_key = tls_private_key.ssh.private_key_pem + } + } + + ingress { + provider = "none" + } + + ignore_docker_version = true + + depends_on = [ + module.ansible_provisioner + ] +} + +resource "local_file" "kube_config_yaml" { + filename = "${path.root}/kube_config.yaml" + sensitive_content = rke_cluster.cluster.kube_config_yaml + file_permission = "0600" +} + +module "cluster_bootstrap" { + source = "../kubernetes-bootstrap" + kube_config = rke_cluster.cluster.kube_config_yaml +} diff --git a/infra/modules/kubernetes-cluster/terraform.tf b/infra/modules/kubernetes-cluster/terraform.tf new file mode 100644 index 00000000..f12a9652 --- /dev/null +++ b/infra/modules/kubernetes-cluster/terraform.tf @@ -0,0 +1,18 @@ +terraform { + required_providers { + lxd = { + source = "terraform-lxd/lxd" + version = "1.5.0" + } + + rke = { + source = "rancher/rke" + version = "1.1.7" + } + + helm = { + source = "hashicorp/helm" + version = "2.1.1" + } + } +} diff --git a/infra/platform.tf b/infra/platform.tf index 3ead7fbb..5edbeafe 100644 --- a/infra/platform.tf +++ b/infra/platform.tf @@ -1,179 +1,3 @@ -resource "tls_private_key" "ssh" { - algorithm = "ECDSA" - ecdsa_curve = "P256" -} - -resource "local_file" "ssh_private_key" { - content = tls_private_key.ssh.private_key_pem - filename = "${path.module}/private.pem" - file_permission = "0600" -} - -resource "lxd_profile" "kubenode" { - name = "kubenode" - - config = { - "limits.cpu" = 2 - "limits.memory" = "4GiB" - "limits.memory.swap" = false - # "security.nesting" = true - # "security.privileged" = true - # "linux.kernel_modules" = "ip_tables,ip6_tables,nf_nat,overlay,br_netfilter" - # "raw.lxc" = <<-EOT - # lxc.apparmor.profile=unconfined - # lxc.cap.drop= - # lxc.cgroup.devices.allow=a - # lxc.mount.auto=proc:rw sys:rw cgroup:rw - # EOT - "user.user-data" = <<-EOT - #cloud-config - ssh_authorized_keys: - - ${tls_private_key.ssh.public_key_openssh} - disable_root: false - runcmd: - - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - - - add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" - - apt-get update -y - - apt-get install -y docker-ce docker-ce-cli containerd.io open-iscsi - - mkdir -p /etc/systemd/system/docker.service.d/ - - printf "[Service]\nMountFlags=shared" > /etc/systemd/system/docker.service.d/mount_flags.conf - - mount --make-rshared / - - systemctl enable --now docker - - systemctl enable --now open-iscsi - EOT - } - - # # echo "262144" > /sys/module/nf_conntrack/parameters/hashsize - # device { - # type = "disk" - # name = "hashsize" - - # properties = { - # source = "/sys/module/nf_conntrack/parameters/hashsize" - # path = "/sys/module/nf_conntrack/parameters/hashsize" - # } - # } - - # device { - # type = "unix-char" - # name = "kmsg" - - # properties = { - # source = "/dev/kmsg" - # path = "/dev/kmsg" - # } - # } - - device { - name = "eth0" - type = "nic" - - properties = { - nictype = "macvlan" - parent = "eno1" - } - } - - device { - type = "disk" - name = "root" - - properties = { - pool = "default" - path = "/" - size = "32GiB" - } - } -} - -# TODO (optimize) DRY master and worker definition -resource "lxd_container" "masters" { - count = 3 - name = "master-${count.index}" - image = "ubuntu:20.04" - # TODO (bug) Use containers instead of virtual machines for Kubernetes nodes https://bugs.launchpad.net/ubuntu/+source/lxc/+bug/1226855 - type = "virtual-machine" - ephemeral = false - - profiles = [lxd_profile.kubenode.name] - - config = { - # TODO (bug) Should be posible to put it in the profile instead lxd_profile.kubenode.config, and make it a variable - # https://github.com/terraform-lxd/terraform-provider-lxd/blob/master/lxd/resource_lxd_container.go#L473 - "user.access_interface" = "enp5s0" - } - - provisioner "local-exec" { - command = "ansible all -u root --private-key ${local_file.ssh_private_key.filename} -i ${self.ip_address}, -m 'wait_for_connection' -a '' && ansible all -u root -i ${self.ip_address}, -m 'wait_for' -a 'path=/var/run/docker.sock'" - environment = { - ANSIBLE_HOST_KEY_CHECKING = "False" - } - } -} - -resource "lxd_container" "workers" { - count = 3 - name = "worker-${count.index}" - image = "ubuntu:20.04" - type = "virtual-machine" - ephemeral = false - - profiles = [lxd_profile.kubenode.name] - - config = { - "user.access_interface" = "enp5s0" - } - - provisioner "local-exec" { - command = "ansible all -u root --private-key ${local_file.ssh_private_key.filename} -i ${self.ip_address}, -m 'wait_for_connection' -a '' && ansible all -u root -i ${self.ip_address}, -m 'wait_for' -a 'path=/var/run/docker.sock'" - environment = { - ANSIBLE_HOST_KEY_CHECKING = "False" - } - } -} - -resource "rke_cluster" "cluster" { - dynamic "nodes" { - for_each = lxd_container.masters - - content { - address = nodes.value.ip_address - user = "root" - role = [ - "controlplane", - "etcd" - ] - ssh_key = tls_private_key.ssh.private_key_pem - } - } - - dynamic "nodes" { - for_each = lxd_container.workers - - content { - address = nodes.value.ip_address - user = "root" - role = [ - "worker" - ] - ssh_key = tls_private_key.ssh.private_key_pem - } - } - - ingress { - provider = "none" - } - - ignore_docker_version = true -} - -resource "local_file" "kube_config_yaml" { - filename = "${path.root}/kube_config.yaml" - sensitive_content = rke_cluster.cluster.kube_config_yaml - file_permission = "0600" -} - -module "rke_cluster_bootstrap" { - source = "./modules/kubernetes-bootstrap" - kube_config = rke_cluster.cluster.kube_config_yaml +module "kubernetes_cluster" { + source = "./modules/kubernetes-cluster" } diff --git a/infra/terraform.tf b/infra/terraform.tf index 7785cae9..a769f06d 100644 --- a/infra/terraform.tf +++ b/infra/terraform.tf @@ -5,6 +5,7 @@ terraform { lock = true } + # TODO Better Terraform provider inheritance required_providers { lxd = { source = "terraform-lxd/lxd" @@ -15,6 +16,11 @@ terraform { source = "rancher/rke" version = "1.1.7" } + + helm = { + source = "hashicorp/helm" + version = "2.1.1" + } } }