Skip to content

Commit

Permalink
Cluster bringup
Browse files Browse the repository at this point in the history
  • Loading branch information
szinn committed Jan 14, 2025
1 parent a1325cc commit 0a3f75f
Show file tree
Hide file tree
Showing 23 changed files with 180 additions and 154 deletions.
118 changes: 49 additions & 69 deletions .taskfiles/Bootstrap/Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ tasks:
- task: merge
vars:
cluster: main
- task: deploy-base
- task: deploy
vars:
cluster: main

Expand All @@ -37,10 +37,7 @@ tasks:
- task: merge
vars:
cluster: staging
- task: deploy-base
vars:
cluster: staging
- task: deploy-cluster
- task: deploy
vars:
cluster: staging

Expand All @@ -64,53 +61,12 @@ tasks:
- chmod og-rwx $HOME/.kube/config
- chmod og-rwx $HOME/.talos/config

dump:
desc: Dump resources
preconditions:
- which helmfile kubectl
- test -f "${TALOSCONFIG}"
- test -f {{.CLUSTER_DIR}}/bootstrap/helmfile.yaml
- test -f {{.CLUSTER_DIR}}/bootstrap/templates/resources.yaml.j2
- talosctl --context {{.cluster}} config info
requires:
vars:
- cluster
env:
TALOSCONFIG: "{{.CLUSTER_DIR}}/talosconfig"
vars:
CLUSTER_DIR: "{{.KUBERNETES_DIR}}/{{.cluster}}"
cmds:
- op run --env-file {{.CLUSTER_DIR}}/bootstrap/bootstrap.env --no-masking -- minijinja-cli "{{.CLUSTER_DIR}}/bootstrap/templates/resources.yaml.j2"

# NOTE: Nodes must all be part of the Ceph cluster
rook:
desc: Bootstrap Rook-Ceph [cluster={{.cluster}}]
preconditions:
- which kubectl talosctl
- talosctl config info
requires:
vars:
- cluster
vars:
BLUESTORE_DISKS_RAW:
sh: talosctl --context {{.cluster}} get discoveredvolumes -o json | jq -r 'select(.spec.type=="disk" and .spec.name=="bluestore") | {"node":.node, "disk":.spec.dev_dath}' | jq -crs '.'
BLUESTORE_DISKS:
ref: "fromJson .BLUESTORE_DISKS_RAW"
cmds:
- for:
var: BLUESTORE_DISKS
vars:
NODE:
sh: kubectl --context {{.cluster}} get nodes -o json | jq -r '.items[] | select(.status.addresses[].address=="{{.ITEM.node}}") | .metadata.name'
DISK: "{{ .ITEM.disk }}"
task: :rook:wipe-disk

deploy-base:
desc: Bootstrap Base [cluster={{.cluster}}]
deploy:
desc: Bootstrap [cluster={{.cluster}}]
preconditions:
- which op helmfile kubectl
- test -f "${TALOSCONFIG}"
- test -f {{.CLUSTER_DIR}}/bootstrap/helmfile-base.yaml
- test -f {{.CLUSTER_DIR}}/bootstrap/helmfile.yaml
- test -f {{.CLUSTER_DIR}}/bootstrap/templates/resources.yaml.j2
- op user get --me
- talosctl --context {{.cluster}} config info
Expand All @@ -119,31 +75,14 @@ tasks:
- cluster
env:
TALOSCONFIG: "{{.CLUSTER_DIR}}/talosconfig"
NODE_COUNT:
sh: kubectl --context {{.context}} get nodes --no-headers --selector !node-role.kubernetes.io/control-plane | wc
NODE_COUNT: 3
vars:
CLUSTER_DIR: "{{.KUBERNETES_DIR}}/{{.cluster}}"
cmds:
- until kubectl --context {{.cluster}} wait nodes --for=condition=Ready=False --all --timeout=10m; do sleep 5; done
- op run --env-file {{.CLUSTER_DIR}}/bootstrap/bootstrap.env --no-masking -- minijinja-cli "{{.CLUSTER_DIR}}/bootstrap/templates/resources.yaml.j2" | kubectl --context {{.cluster}} apply --server-side --filename -
- helmfile --kube-context {{.cluster}} --quiet --file {{.CLUSTER_DIR}}/bootstrap/helmfile-base.yaml apply --skip-diff-on-install --suppress-diff

deploy-cluster:
desc: Bootstrap Apps [cluster={{.cluster}}]
preconditions:
- which helmfile kubectl
- test -f "${TALOSCONFIG}"
- test -f {{.CLUSTER_DIR}}/bootstrap/helmfile-cluster.yaml
- talosctl --context {{.cluster}} config info
requires:
vars:
- cluster
env:
TALOSCONFIG: "{{.CLUSTER_DIR}}/talosconfig"
vars:
CLUSTER_DIR: "{{.KUBERNETES_DIR}}/{{.cluster}}"
cmds:
- helmfile --kube-context {{.cluster}} --quiet --file {{.CLUSTER_DIR}}/bootstrap/helmfile-cluster.yaml apply --skip-diff-on-install --suppress-diff
- helmfile --kube-context {{.cluster}} --quiet --file {{.CLUSTER_DIR}}/bootstrap/helmfile.yaml apply --skip-diff-on-install --suppress-diff
- helmfile --kube-context {{.cluster}} --quiet --file {{.CLUSTER_DIR}}/bootstrap/helmfile.yaml destroy --selector name=wipe-rook

get-certs:
desc: Fetch certificates from cluster
Expand All @@ -168,3 +107,44 @@ tasks:
vars:
certs:
sh: 'echo {{if eq .cluster "main"}} "tech-wildcard wildcard" {{else}} "wildcard" {{end}}'

# dump:
# desc: Dump resources
# preconditions:
# - which helmfile kubectl
# - test -f "${TALOSCONFIG}"
# - test -f {{.CLUSTER_DIR}}/bootstrap/helmfile.yaml
# - test -f {{.CLUSTER_DIR}}/bootstrap/templates/resources.yaml.j2
# - talosctl --context {{.cluster}} config info
# requires:
# vars:
# - cluster
# env:
# TALOSCONFIG: "{{.CLUSTER_DIR}}/talosconfig"
# vars:
# CLUSTER_DIR: "{{.KUBERNETES_DIR}}/{{.cluster}}"
# cmds:
# - op run --env-file {{.CLUSTER_DIR}}/bootstrap/bootstrap.env --no-masking -- minijinja-cli "{{.CLUSTER_DIR}}/bootstrap/templates/resources.yaml.j2"

# NOTE: Nodes must all be part of the Ceph cluster
rook:
desc: Bootstrap Rook-Ceph [cluster={{.cluster}}]
preconditions:
- which kubectl talosctl
- talosctl config info
requires:
vars:
- cluster
vars:
BLUESTORE_DISKS_RAW:
sh: talosctl --context {{.cluster}} get discoveredvolumes -o json | jq -r 'select(.spec.type=="disk" and .spec.name=="bluestore") | {"node":.node, "disk":.spec.dev_dath}' | jq -crs '.'
BLUESTORE_DISKS:
ref: "fromJson .BLUESTORE_DISKS_RAW"
cmds:
- for:
var: BLUESTORE_DISKS
vars:
NODE:
sh: kubectl --context {{.cluster}} get nodes -o json | jq -r '.items[] | select(.status.addresses[].address=="{{.ITEM.node}}") | .metadata.name'
DISK: "{{ .ITEM.disk }}"
task: :rook:wipe-disk
3 changes: 2 additions & 1 deletion .taskfiles/volsync/Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ tasks:
# vars: *env
- kubectl --context {{.cluster}} -n {{.ns}} patch replicationsources {{.app}} --type merge -p '{"spec":{"trigger":{"manual":"{{.ts}}"}}}'
- bash {{.scriptsDir}}/wait-for-k8s-job.sh volsync-src-{{.app}} {{.ns}} {{.cluster}}
- kubectl --context {{.cluster}} -n {{.ns}} wait job/volsync-src-{{.app}} --for condition=complete --timeout=120m
- cmd: kubectl --context {{.cluster}} -n {{.ns}} wait job/volsync-src-{{.app}} --for condition=complete --timeout=120m
ignore_error: true
vars:
cluster: '{{ or .cluster (fail "Argument (cluster) is required") }}'
app: '{{ or .app (fail "Argument (app) is required") }}'
Expand Down
37 changes: 37 additions & 0 deletions hack/ubuntu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
---
apiVersion: v1
kind: Pod
metadata:
name: ubuntu-shell
namespace: default
spec:
terminationGracePeriodSeconds: 5
# nodeName: k8s-5
containers:
- name: shell
image: docker.io/library/alpine
tty: true
stdin: true
securityContext:
privileged: true
volumeMounts:
- mountPath: /mnt/host_var
name: host-var
- mountPath: /dev/disk
name: host-dev-disk
# - mountPath: /config
# name: config
resources:
requests:
cpu: 100m
memory: 100Mi
limits:
cpu: 2000m
memory: 10Gi
volumes:
- name: host-var
hostPath:
path: /var
- name: host-dev-disk
hostPath:
path: /dev/disk
44 changes: 22 additions & 22 deletions kubernetes/main/apps/dbms/cloudnative-pg/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,26 +43,26 @@ spec:
wait: false
interval: 30m
timeout: 5m
---
# ---
# yaml-language-server: $schema=https://kubernetes-schemas.zinn.ca/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app cloudnative-pg-backup
namespace: flux-system
spec:
targetNamespace: dbms
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/main/apps/dbms/cloudnative-pg/backup
sourceRef:
kind: GitRepository
name: flux-system
dependsOn:
- name: cloudnative-pg
- name: external-secrets
prune: true
wait: false
interval: 30m
timeout: 5m
# apiVersion: kustomize.toolkit.fluxcd.io/v1
# kind: Kustomization
# metadata:
# name: &app cloudnative-pg-backup
# namespace: flux-system
# spec:
# targetNamespace: dbms
# commonMetadata:
# labels:
# app.kubernetes.io/name: *app
# path: ./kubernetes/main/apps/dbms/cloudnative-pg/backup
# sourceRef:
# kind: GitRepository
# name: flux-system
# dependsOn:
# - name: cloudnative-pg
# - name: external-secrets
# prune: true
# wait: false
# interval: 30m
# timeout: 5m
23 changes: 23 additions & 0 deletions kubernetes/main/apps/flux-system/flux-config/install.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
# yaml-language-server: $schema=https://kubernetes-schemas.zinn.ca/kustomize.toolkit.fluxcd.io/kustomization_v1.json
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: &app flux-config
namespace: flux-system
spec:
targetNamespace: flux-system
commonMetadata:
labels:
app.kubernetes.io/name: *app
path: ./kubernetes/main/apps/flux-system/flux-config/github
sourceRef:
kind: GitRepository
name: flux-system
dependsOn:
- name: flux-operator
- name: external-secrets
prune: true
wait: true
interval: 30m
timeout: 5m
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- github
- helm-release.yaml
- pod-monitor.yaml
- prometheus-rule.yaml
Expand Down
1 change: 1 addition & 0 deletions kubernetes/main/apps/flux-system/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ resources:
- ../../templates/alerts
# Flux-Kustomizations
- flux-operator/install.yaml
- flux-config/install.yaml
transformers:
- |-
apiVersion: builtin
Expand Down
6 changes: 3 additions & 3 deletions kubernetes/main/apps/kustomization.yaml.bootstrap
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ resources:
- networking
- system
# Step 2
# - rook-ceph
- rook-ceph
# Step 3
# - dbms
- dbms
# Step 4
# - self-hosted
- self-hosted
2 changes: 1 addition & 1 deletion kubernetes/main/apps/media/immich/db/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ resources:
- backups-pvc.yaml
- backups-secret.yaml
- cluster.yaml
- helm-release.yaml
# - helm-release.yaml
- postgres-superuser-secret.yaml
- service.yaml
labels:
Expand Down
1 change: 1 addition & 0 deletions kubernetes/main/apps/networking/nginx/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ spec:
name: flux-system
dependsOn:
- name: cert-manager
- name: external-secrets
prune: true
wait: false
interval: 30m
Expand Down
27 changes: 0 additions & 27 deletions kubernetes/main/bootstrap/helmfile-cluster.yaml

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,22 @@ releases:
- ./templates/wipe-rook.yaml.gotmpl
needs:
- system/spegel

- name: flux-operator
namespace: flux-system
chart: oci://ghcr.io/controlplaneio-fluxcd/charts/flux-operator
version: 0.13.0
values:
- ../apps/flux-system/flux-operator/app/helm-values.yaml
needs:
- kube-system/wipe-rook

- name: flux-instance
namespace: flux-system
chart: oci://ghcr.io/controlplaneio-fluxcd/charts/flux-instance
version: 0.13.0
wait: false
values:
- ../apps/flux-system/flux-operator/instance/helm-values.yaml
needs:
- flux-system/flux-operator
6 changes: 3 additions & 3 deletions kubernetes/main/bootstrap/templates/wipe-rook.yaml.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ controllers:
args:
- |
apk add --no-cache findutils nvme-cli;
DISK=$(find /dev/disk/by-id/ -iname "*$(MODEL)*" -not -name "*_[0-9]");
DISK=$(find /dev/disk/by-id/ -iname "*$MODEL*" -not -name "*_[0-9]");
echo "=== Wiping $DISK ===";
nvme format --lbaf=1 $DISK --force;
nvme format --lbaf=0 $DISK --force;
nvme format --block-size=4096 $DISK --force;
securityContext:
privileged: true
Expand All @@ -47,7 +47,7 @@ defaultPodOptions:
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
app.kubernetes.io/name: wipe-disk-job
app.kubernetes.io/name: wipe-rook
persistence:
host-var:
type: hostPath
Expand Down
Loading

0 comments on commit 0a3f75f

Please sign in to comment.