From b4ba7ea6e2e80aa205e4de536c99308fa4cdde0b Mon Sep 17 00:00:00 2001 From: Khue Doan Date: Sun, 3 Mar 2024 10:16:49 +0700 Subject: [PATCH] refactor!: replace Longhorn with Rook Ceph Longhorn is too unreliable for some reason. --- .../pxe_server/templates/kickstart.ks.j2 | 20 +++++++++++++++---- system/longhorn-system/Chart.yaml | 7 ------- .../templates/servicemonitor.yaml | 17 ---------------- system/longhorn-system/values.yaml | 6 ------ system/rook-ceph/Chart.yaml | 10 ++++++++++ system/rook-ceph/values.yaml | 2 ++ 6 files changed, 28 insertions(+), 34 deletions(-) delete mode 100644 system/longhorn-system/Chart.yaml delete mode 100644 system/longhorn-system/templates/servicemonitor.yaml delete mode 100644 system/longhorn-system/values.yaml create mode 100644 system/rook-ceph/Chart.yaml create mode 100644 system/rook-ceph/values.yaml diff --git a/metal/roles/pxe_server/templates/kickstart.ks.j2 b/metal/roles/pxe_server/templates/kickstart.ks.j2 index ae3f4e24..d630fb5a 100644 --- a/metal/roles/pxe_server/templates/kickstart.ks.j2 +++ b/metal/roles/pxe_server/templates/kickstart.ks.j2 @@ -13,7 +13,7 @@ clearpart --all --drives={{ hostvars[item]['disk'] }} # Partitioning ignoredisk --only-use={{ hostvars[item]['disk'] }} partition /boot/efi --fstype=vfat --size=512 -partition / --fstype=ext4 --grow +partition / --fstype=ext4 --size=32768 # Network information network --bootproto=static --device={{ hostvars[item]['network_interface'] }} --ip={{ hostvars[item]['ansible_host'] }} --gateway={{ ansible_default_ipv4.gateway }} --nameserver={{ dns_server }} --netmask={{ ansible_default_ipv4.netmask }} --ipv6=auto --hostname={{ hostvars[item]['inventory_hostname'] }} --activate @@ -46,10 +46,22 @@ firewall --disabled %packages @^custom-environment openssh-server -iscsi-initiator-utils %end -# Enable iSCSI for Kubernetes storage -services --enable=iscsid +# Create a raw partition for Ceph using the remaining space +# Using a post script because there is no built-in feature in Kickstart +# The three empty lines are equivalent to pressing Enter to use the default values for: +# - Partition number +# - First sector +# - Last sector +%post +fdisk /dev/{{ hostvars[item]['disk'] }} << EOF +new + + + +write +EOF +%end reboot diff --git a/system/longhorn-system/Chart.yaml b/system/longhorn-system/Chart.yaml deleted file mode 100644 index 68288396..00000000 --- a/system/longhorn-system/Chart.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: v2 -name: longhorn -version: 0.0.0 -dependencies: - - name: longhorn - version: 1.6.0 - repository: https://charts.longhorn.io diff --git a/system/longhorn-system/templates/servicemonitor.yaml b/system/longhorn-system/templates/servicemonitor.yaml deleted file mode 100644 index 58a1bada..00000000 --- a/system/longhorn-system/templates/servicemonitor.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# TODO alert rules following https://longhorn.io/docs/1.1.0/monitoring/prometheus_and_grafana_setup/ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: {{ .Release.Name }} - namespace: {{ .Release.Namespace }} - annotations: - argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true -spec: - selector: - matchLabels: - app: longhorn-manager - namespaceSelector: - matchNames: - - {{ .Release.Namespace }} - endpoints: - - port: manager diff --git a/system/longhorn-system/values.yaml b/system/longhorn-system/values.yaml deleted file mode 100644 index ec3ba4ee..00000000 --- a/system/longhorn-system/values.yaml +++ /dev/null @@ -1,6 +0,0 @@ -longhorn: - defaultSettings: - nodeDownPodDeletionPolicy: delete-both-statefulset-and-deployment-pod - persistence: - # If you have three or more nodes for storage, use 3; otherwise use 2 - defaultClassReplicaCount: 2 # TODO run DR test to see if we actually need 3 diff --git a/system/rook-ceph/Chart.yaml b/system/rook-ceph/Chart.yaml new file mode 100644 index 00000000..5110c416 --- /dev/null +++ b/system/rook-ceph/Chart.yaml @@ -0,0 +1,10 @@ +apiVersion: v2 +name: rook-ceph +version: 0.0.0 +dependencies: + - name: rook-ceph + version: 1.13.5 + repository: https://charts.rook.io/release + - name: rook-ceph-cluster + version: 1.13.5 + repository: https://charts.rook.io/release diff --git a/system/rook-ceph/values.yaml b/system/rook-ceph/values.yaml new file mode 100644 index 00000000..a7198eaa --- /dev/null +++ b/system/rook-ceph/values.yaml @@ -0,0 +1,2 @@ +rook-ceph: {} +rook-ceph-cluster: {}