diff --git a/cluster/base/infra/generic-cdi-plugin.yaml b/cluster/base/infra/generic-cdi-plugin.yaml new file mode 100644 index 0000000..3624f9b --- /dev/null +++ b/cluster/base/infra/generic-cdi-plugin.yaml @@ -0,0 +1,16 @@ +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: infra-generic-cdi-plugin + namespace: flux-system +spec: + interval: 1h0m0s + path: ./infra/controllers/generic-cdi-plugin + prune: true + retryInterval: 1m0s + sourceRef: + kind: GitRepository + name: flux-system + timeout: 5m0s + wait: true diff --git a/infra/controllers/generic-cdi-plugin/generic-cdi-plugin.yaml b/infra/controllers/generic-cdi-plugin/generic-cdi-plugin.yaml new file mode 100644 index 0000000..2396348 --- /dev/null +++ b/infra/controllers/generic-cdi-plugin/generic-cdi-plugin.yaml @@ -0,0 +1,52 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: generic-cdi-plugin +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: generic-cdi-plugin-daemonset + namespace: generic-cdi-plugin +spec: + selector: + matchLabels: + name: generic-cdi-plugin + template: + metadata: + labels: + name: generic-cdi-plugin + app.kubernetes.io/component: generic-cdi-plugin + app.kubernetes.io/name: generic-cdi-plugin + spec: + containers: + - image: ghcr.io/olfillasodikno/generic-cdi-plugin:main + name: generic-cdi-plugin + command: + - /generic-cdi-plugin + - /var/run/cdi/nvidia-container-toolkit.json + imagePullPolicy: Always + securityContext: + privileged: true + tty: true + volumeMounts: + - name: kubelet + mountPath: /var/lib/kubelet + - name: nvidia-container-toolkit + mountPath: /var/run/cdi/nvidia-container-toolkit.json + volumes: + - name: kubelet + hostPath: + path: /var/lib/kubelet + - name: nvidia-container-toolkit + hostPath: + path: /var/run/cdi/nvidia-container-toolkit.json + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "nixos-nvidia-cdi" + operator: In + values: + - "enabled" diff --git a/infra/controllers/generic-cdi-plugin/node-penguin.yaml b/infra/controllers/generic-cdi-plugin/node-penguin.yaml new file mode 100644 index 0000000..55a2c40 --- /dev/null +++ b/infra/controllers/generic-cdi-plugin/node-penguin.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Node +metadata: + name: penguin + labels: + nixos-nvidia-cdi: enabled