diff --git a/.gitignore b/.gitignore index 0bde46f4..cc02536e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ terraform-*-tmp-*/ *.log .terraform* backend.tfvars +ignition.json kube_config.yaml terraform.tfstate* terraform.tfvars diff --git a/README.md b/README.md index dce60314..9e6c7d30 100644 --- a/README.md +++ b/README.md @@ -115,3 +115,4 @@ Distributed under the GPLv3 License. See `LICENSE` for more information. - ArgoCD usage in [my coworker's homelab](https://github.com/locmai/humble) - [README template](https://github.com/othneildrew/Best-README-Template) - [Run the same Cloudflare Tunnel across many `cloudflared` processes](https://developers.cloudflare.com/cloudflare-one/tutorials/many-cfd-one-tunnel) +- [MAC address environment variable in GRUB config](https://askubuntu.com/questions/1272400/how-do-i-automate-network-installation-of-many-ubuntu-18-04-systems-with-efi-and) diff --git a/docs/todo.md b/docs/todo.md index 12f28ab7..e1031515 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -1,10 +1,8 @@ # TODO -- [(bug) Fix only 15GiB root partition](../metal/roles/pxe-boot/templates/http/kickstart/fedora.ks.j2#L20) - [(feature) Add lint checks for everything](../Makefile#L29) - [(feature) Simple script to backup everything](../scripts/backup.sh#L3) - [(feature) Simple script to restore everything](../scripts/restore.sh#L3) -- [(optimize) Node firewall](../metal/roles/docker/tasks/main.yml#L1) -- [(optimize) Restructure provisioning roles](../metal/roles/docker/tasks/main.yml#L18) +- [(optimize) Get timezone automatically from the controller](../metal/roles/pxe-boot/defaults/main.yml#L7) - [(optimize) Use metal values for MetalLB values](../apps/resources/metallb.yaml#L23) - [(optimize) Use reflector to generate mirrorlist dynamically](../tools/Dockerfile#L3) diff --git a/infra/README.md b/infra/README.md index bedb246e..a9a5d8ff 100644 --- a/infra/README.md +++ b/infra/README.md @@ -1 +1,9 @@ # Private cloud infrastructure + +## Kubernetes cluster `./cluster.tf` + +Using Rancher Kubernetes Engine, with some config specific to CoreOS + +## Bootstrap `./bootstrap.tf` + +Install some essential Helm charts (network, storage,...) diff --git a/infra/cluster.tf b/infra/cluster.tf index 303d86e1..09827acc 100644 --- a/infra/cluster.tf +++ b/infra/cluster.tf @@ -46,6 +46,23 @@ resource "rke_cluster" "cluster" { ingress { provider = "none" } + + # For CoreOS + network { + plugin = "canal" + options = { + canal_flex_volume_plugin_dir = "/opt/kubernetes/kubelet-plugins/volume/exec/nodeagent~uds" + flannel_backend_type = "vxlan" + } + } + + services { + kube_controller { + extra_args = { + flex-volume-plugin-dir = "/opt/kubernetes/kubelet-plugins/volume/exec/" + } + } + } } resource "local_file" "kube_config_yaml" { diff --git a/metal/hosts.yml b/metal/hosts.yml index e20cd0bf..e848d2a4 100644 --- a/metal/hosts.yml +++ b/metal/hosts.yml @@ -4,10 +4,10 @@ controller: metal: hosts: - metal0: {ansible_host: 192.168.1.110, mac: 00-23-24-d1-f4-d6} - metal1: {ansible_host: 192.168.1.111, mac: 00-23-24-e7-04-60} - metal2: {ansible_host: 192.168.1.112, mac: 00-23-24-d1-f5-69} - metal3: {ansible_host: 192.168.1.113, mac: 00-23-24-d1-f3-f0} + metal0: {ansible_host: 192.168.1.110, mac: '00:23:24:d1:f4:d6'} + metal1: {ansible_host: 192.168.1.111, mac: '00:23:24:e7:04:60'} + metal2: {ansible_host: 192.168.1.112, mac: '00:23:24:d1:f5:69'} + metal3: {ansible_host: 192.168.1.113, mac: '00:23:24:d1:f3:f0'} vars: ansible_user: root ansible_ssh_private_key_file: ~/.ssh/id_ed25519 diff --git a/metal/main.yml b/metal/main.yml index 3924c17f..b2a3574e 100644 --- a/metal/main.yml +++ b/metal/main.yml @@ -4,12 +4,8 @@ roles: - pxe-boot -- name: Provision - hosts: metal - roles: - - docker - - name: Create Terraform state storage hosts: metal[0] + gather_facts: no roles: - tfstate diff --git a/metal/roles/docker/tasks/main.yml b/metal/roles/docker/tasks/main.yml deleted file mode 100644 index 9d817b55..00000000 --- a/metal/roles/docker/tasks/main.yml +++ /dev/null @@ -1,9 +0,0 @@ -- name: Install Docker - dnf: - name: docker - -- name: Start Docker service - systemd: - name: docker - state: started - enabled: yes diff --git a/metal/roles/pxe-boot/defaults/main.yml b/metal/roles/pxe-boot/defaults/main.yml index 9f30ae14..f465f79b 100644 --- a/metal/roles/pxe-boot/defaults/main.yml +++ b/metal/roles/pxe-boot/defaults/main.yml @@ -1,5 +1,8 @@ -iso_url: "https://download.fedoraproject.org/pub/fedora/linux/releases/34/Server/x86_64/iso/Fedora-Server-dvd-x86_64-34-1.2.iso" -iso_checksum: "sha256:0b9dc87d060c7c4ef89f63db6d4d1597dd3feaf4d635ca051d87f5e8c89e8675" -iso_file_name: "Fedora-Server-dvd-x86_64-34-1.2.iso" +iso_url: "https://builds.coreos.fedoraproject.org/prod/streams/stable/builds/34.20210518.3.0/x86_64/fedora-coreos-34.20210518.3.0-live.x86_64.iso" +iso_checksum: "sha256:acbf3cd5b25da7a1fb6862485a6a38064dd8f14dc0398137d371abb9b148eaed" +iso_file_name: "fedora-coreos-34.20210518.3.0-live.x86_64.iso" os_username: admin +os_name: CoreOS +# TODO (optimize) Get timezone automatically from the controller +timezone: Asia/Ho_Chi_Minh diff --git a/metal/roles/pxe-boot/handlers/main.yml b/metal/roles/pxe-boot/handlers/main.yml index 3f7fddfc..5aefdd77 100644 --- a/metal/roles/pxe-boot/handlers/main.yml +++ b/metal/roles/pxe-boot/handlers/main.yml @@ -1,4 +1,4 @@ -- name: Stop PXE server +- name: Stop ephemeral PXE server docker_compose: project_src: "{{ role_path }}/build" state: absent diff --git a/metal/roles/pxe-boot/tasks/server.yml b/metal/roles/pxe-boot/tasks/server.yml index 8018d41f..42813aa7 100644 --- a/metal/roles/pxe-boot/tasks/server.yml +++ b/metal/roles/pxe-boot/tasks/server.yml @@ -25,6 +25,13 @@ cmd: "xorriso -osirrox on -indev {{ iso.dest }} -extract / {{ role_path }}/build/mnt" creates: "{{ role_path }}/build/mnt/.treeinfo" +- name: Extract bootloader + iso_extract: + image: "{{ role_path }}/build/mnt/images/efiboot.img" + dest: "{{ role_path }}/build/mnt/EFI/fedora" + files: + - EFI/fedora/grubx64.efi + - name: Copy configs copy: src: "{{ role_path }}/templates/" @@ -40,18 +47,29 @@ src: tftp/tftpboot/grub.cfg.j2 dest: "{{ role_path }}/build/tftp/tftpboot/grub.cfg" -- name: Render shared kickstart config +- name: Render machine specific Butane config template: - src: http/kickstart/fedora.ks.j2 - dest: "{{ role_path }}/build/http/kickstart/fedora.ks" - -- name: Render seperate network kickstart config for each node - template: - src: http/kickstart/network/network.ks.j2 - dest: "{{ role_path }}/build/http/kickstart/network/{{ hostvars[item]['mac'] }}.ks" + src: http/ignition/ignition.yaml.j2 + dest: "{{ role_path }}/build/http/ignition/{{ hostvars[item]['mac'] }}.yaml" loop: "{{ groups['metal'] }}" -- name: Start PXE server +- name: Render Ignition config from Butane config + docker_container: + name: butane + image: quay.io/coreos/butane:release + auto_remove: yes + volumes: + - "{{ role_path }}/build/http/ignition:/local/src" + working_dir: /local/src + command: + - --pretty + - --strict + - "{{ hostvars[item]['mac'] }}.yaml" + - --output + - "{{ hostvars[item]['mac'] }}.json" + loop: "{{ groups['metal'] }}" + +- name: Start ephemeral PXE server docker_compose: project_src: "{{ role_path }}/build" state: present @@ -59,4 +77,4 @@ build: yes recreate: always notify: - - Stop PXE server + - Stop ephemeral PXE server diff --git a/metal/roles/pxe-boot/tasks/wake.yml b/metal/roles/pxe-boot/tasks/wake.yml index ebd8e736..e9eaa8d7 100644 --- a/metal/roles/pxe-boot/tasks/wake.yml +++ b/metal/roles/pxe-boot/tasks/wake.yml @@ -4,5 +4,8 @@ delegate_to: localhost - name: Wait for the servers to comes up - wait_for_connection: - timeout: 600 + wait_for: + host: '{{ ansible_host }}' + port: 22 + search_regex: OpenSSH + connection: local diff --git a/metal/roles/pxe-boot/templates/docker-compose.yml b/metal/roles/pxe-boot/templates/docker-compose.yml index 3f4da50e..d4061f08 100644 --- a/metal/roles/pxe-boot/templates/docker-compose.yml +++ b/metal/roles/pxe-boot/templates/docker-compose.yml @@ -8,13 +8,15 @@ services: build: ./tftp network_mode: host volumes: - - ./mnt/EFI/BOOT/grubx64.efi:/var/lib/tftpboot/grubx64.efi + - ./mnt/EFI/fedora/grubx64.efi:/var/lib/tftpboot/grubx64.efi + - ./mnt/images/ignition.img:/var/lib/tftpboot/ignition.img - ./mnt/images/pxeboot/initrd.img:/var/lib/tftpboot/initrd.img - ./mnt/images/pxeboot/vmlinuz:/var/lib/tftpboot/vmlinuz http: build: ./http network_mode: host volumes: - - ./mnt:/usr/share/nginx/html/Fedora + - ./mnt:/usr/share/nginx/html/CoreOS + - ./http/ignition/:/usr/share/nginx/html/ignition environment: NGINX_PORT: 80 diff --git a/metal/roles/pxe-boot/templates/http/Dockerfile b/metal/roles/pxe-boot/templates/http/Dockerfile index c90eeb7d..990f0065 100644 --- a/metal/roles/pxe-boot/templates/http/Dockerfile +++ b/metal/roles/pxe-boot/templates/http/Dockerfile @@ -1,3 +1 @@ FROM nginx:1.19-alpine - -COPY ./kickstart /usr/share/nginx/html/kickstart diff --git a/metal/roles/pxe-boot/templates/http/ignition/ignition.yaml.j2 b/metal/roles/pxe-boot/templates/http/ignition/ignition.yaml.j2 new file mode 100644 index 00000000..29b85734 --- /dev/null +++ b/metal/roles/pxe-boot/templates/http/ignition/ignition.yaml.j2 @@ -0,0 +1,64 @@ +variant: fcos +version: 1.3.0 + +passwd: + users: + - name: root + ssh_authorized_keys: + - {{ ssh_public_key }} + +storage: + # Set hostname + files: + - path: /etc/hostname + mode: 0644 + contents: + inline: {{ hostvars[item]['inventory_hostname'] }} + - path: /etc/NetworkManager/system-connections/{{ network_interface }}.nmconnection + mode: 0600 + contents: + inline: | + [connection] + id={{ network_interface }} + type=ethernet + interface-name={{ network_interface }} + permissions= + [ipv4] + # TODO + address1={{ hostvars[item]['ansible_host'] }}/24,{{ ansible_default_ipv4.gateway }} + dns={{ dns_server }}; + dns-search= + method=manual + # Set timezone + links: + - path: /etc/localtime + target: /usr/share/zoneinfo/{{ timezone }} + +systemd: + units: + - name: iscsid.service + enabled: true +{% if item == "metal0" %} + - name: tfstate.service + enabled: true + contents: | + [Unit] + Description=Run etcd for Terraform state backend + After=network-online.target + Wants=network-online.target + + [Service] + ExecStartPre=-/usr/bin/docker kill tfstate + ExecStartPre=-/usr/bin/docker rm tfstate + ExecStart=/usr/bin/docker run --name tfstate \ + --volume tfstate:/bitnami/etcd/data \ + --env ALLOW_NONE_AUTHENTICATION=yes \ + --publish 23799:2379 \ + --restart always \ + bitnami/etcd + ExecStop=/usr/bin/docker stop tfstate + Restart=always + + [Install] + WantedBy=multi-user.target +{% endif %} diff --git a/metal/roles/pxe-boot/templates/http/kickstart/fedora.ks.j2 b/metal/roles/pxe-boot/templates/http/kickstart/fedora.ks.j2 deleted file mode 100644 index ab3a47be..00000000 --- a/metal/roles/pxe-boot/templates/http/kickstart/fedora.ks.j2 +++ /dev/null @@ -1,55 +0,0 @@ -#version=DEVEL - -%pre --interpreter=/bin/sh -mac=$(ip --brief link show dev {{ network_interface }} | tr -s ' ' | cut -d ' ' -f 3 | sed 's/:/-/g') -curl "http://{{ ansible_default_ipv4.address }}/kickstart/network/$mac.ks" > /tmp/network.ks -%end - -# Do not use graphical install -text - -# Keyboard layouts -keyboard --xlayouts='us' -# System language -lang en_US.UTF-8 - -# Partition clearing information -clearpart --all --drives={{ disk }} -# Partitioning -ignoredisk --only-use={{ disk }} -# TODO (bug) Fix only 15GiB root partition -autopart - -# Network information -%include /tmp/network.ks - -# Use network installation -url --url="http://{{ ansible_default_ipv4.address }}/Fedora/" -# Disable Setup Agent on first boot -firstboot --disable -# Do not configure the X Window System -skipx -# System services -services --enabled="chronyd" -# System timezone -timezone Asia/Ho_Chi_Minh --utc - -# Create user (locked by default) -user --groups=wheel --name={{ os_username }} -# Add SSH key -sshkey --username=root "{{ ssh_public_key }}" - -# SELinux -selinux --disabled - -# Firewall -firewall --disabled - -%packages -@^server-product-environment -%end - -# Enable some services for Kubernetes -services --enable=iscsid - -reboot diff --git a/metal/roles/pxe-boot/templates/http/kickstart/network/network.ks.j2 b/metal/roles/pxe-boot/templates/http/kickstart/network/network.ks.j2 deleted file mode 100644 index dfa41d01..00000000 --- a/metal/roles/pxe-boot/templates/http/kickstart/network/network.ks.j2 +++ /dev/null @@ -1 +0,0 @@ -network --bootproto=static --device={{ network_interface }} --ip={{ hostvars[item]['ansible_host'] }} --gateway={{ ansible_default_ipv4.gateway }} --nameserver={{ dns_server }} --netmask={{ ansible_default_ipv4.netmask }} --ipv6=auto --hostname={{ hostvars[item]['inventory_hostname'] }} --activate diff --git a/metal/roles/pxe-boot/templates/tftp/tftpboot/grub.cfg.j2 b/metal/roles/pxe-boot/templates/tftp/tftpboot/grub.cfg.j2 index 3e70d638..6a79aa9b 100644 --- a/metal/roles/pxe-boot/templates/tftp/tftpboot/grub.cfg.j2 +++ b/metal/roles/pxe-boot/templates/tftp/tftpboot/grub.cfg.j2 @@ -1,6 +1,11 @@ -set timeout=5 +set timeout=1 -menuentry 'Fedora' { - linuxefi vmlinuz ip=dhcp inst.repo=http://{{ ansible_default_ipv4.address }}/Fedora inst.ks=http://{{ ansible_default_ipv4.address }}/kickstart/fedora.ks - initrdefi initrd.img +menuentry '{{ os_name }} (Live)' { + linux vmlinuz \ + ip=dhcp \ + ignition.platform.id=metal \ + coreos.live.rootfs_url=http://{{ ansible_default_ipv4.address }}/{{ os_name }}/images/pxeboot/rootfs.img \ + coreos.inst.install_dev=/dev/{{ disk }} \ + coreos.inst.ignition_url=http://{{ ansible_default_ipv4.address }}/ignition/${net_default_mac}.json + initrd initrd.img ignition.img } diff --git a/metal/roles/tfstate/tasks/main.yml b/metal/roles/tfstate/tasks/main.yml index aa82cccb..6507f6bd 100644 --- a/metal/roles/tfstate/tasks/main.yml +++ b/metal/roles/tfstate/tasks/main.yml @@ -1,18 +1,8 @@ -- name: Install Python library for the Docker - dnf: - name: python-docker - -- name: Create etcd container - docker_container: - name: etcd-tfstate - image: bitnami/etcd - restart_policy: always - env: - ALLOW_NONE_AUTHENTICATION: "yes" - published_ports: - - "{{ etcd_port }}:2379" - volumes: - - tfstate:/bitnami/etcd/data +- name: Wait for etcd + wait_for: + port: 23799 + host: '{{ ansible_ssh_host }}' + connection: local - name: Generate Terraform backend config delegate_to: localhost diff --git a/metal/roles/tfstate/templates/backend.tfvars.j2 b/metal/roles/tfstate/templates/backend.tfvars.j2 index 661495fb..4a285b08 100644 --- a/metal/roles/tfstate/templates/backend.tfvars.j2 +++ b/metal/roles/tfstate/templates/backend.tfvars.j2 @@ -1,5 +1,5 @@ endpoints = [ {% for host in ansible_play_hosts %} - "{{ hostvars[host].ansible_default_ipv4.address }}:{{ etcd_port }}", + "{{ hostvars[host].ansible_host }}:{{ etcd_port }}", {% endfor %} ] diff --git a/tools/Dockerfile b/tools/Dockerfile index 5ef5207c..68be9a19 100644 --- a/tools/Dockerfile +++ b/tools/Dockerfile @@ -13,6 +13,7 @@ RUN yes | pacman --sync --refresh \ kubectl \ libisoburn \ make \ + p7zip \ python \ python-netaddr \ terraform \