fix template

This commit is contained in:
pinpox
2025-10-28 01:01:03 +01:00
parent 98cfaac849
commit fabbfcaab6
3 changed files with 49 additions and 12 deletions

View File

@@ -0,0 +1,24 @@
{ lib }:
lib.mapAttrsToList
(name: opts: {
alert = name;
expr = opts.condition;
for = opts.time or "2m";
labels = { };
annotations.description = opts.description;
})
{
# TODO Remove this alert, just for testing
"Filesystem > = 10%" = {
condition = ''disk_used_percent{fstype!~"tmpfs|vfat|devtmpfs|efivarfs"} > 10'';
time = "1m";
description = "{{$labels.instance}} device {{$labels.device}} on {{$labels.path}} got less than 90% space left on its filesystem.";
};
filesystem_full_80percent = {
condition = ''disk_used_percent{fstype!~"tmpfs|vfat|devtmpfs|efivarfs"} > 80'';
time = "1m";
description = "{{$labels.instance}} device {{$labels.device}} on {{$labels.path}} got less than 20% space left on its filesystem.";
};
}

View File

@@ -90,10 +90,10 @@ tr:hover {
</head>
<body>
<div class="container">
<h1>System Overview</h1>
<h1>Clan Status</h1>
<h2>Instance Status</h2>
<h2>Instances</h2>
<table>
<thead>
<tr>
@@ -106,8 +106,9 @@ tr:hover {
</thead>
<tbody>
{{ range query "up" | sortByLabel "instance" }}
{{ $hostname := reReplaceAll "\\..*" "" .Labels.instance }}
<tr>
<td>{{ .Labels.instance }}</td>
<td>{{ $hostname }}</td>
<td>
{{ if eq .Value 1.0 }}
<span class="status-up">UP</span>
@@ -116,7 +117,7 @@ tr:hover {
{{ end }}
</td>
<td>
{{ $cpuQuery := query (printf "100 - (avg by(host) (rate(cpu_seconds_total{mode=\"idle\",host=\"%s\"}[5m])) * 100)" .Labels.instance) }}
{{ $cpuQuery := query (printf "100 - cpu_usage_idle{cpu=\"cpu-total\",host=\"%s\"}" $hostname) }}
{{ if $cpuQuery }}
{{ with $cpuQuery | first }}
<span class="metric-value">{{ . | value | printf "%.1f" }}%</span>
@@ -126,7 +127,7 @@ tr:hover {
{{ end }}
</td>
<td>
{{ $memQuery := query (printf "(1 - (mem_available_bytes{host=\"%s\"} / mem_total_bytes{host=\"%s\"})) * 100" .Labels.instance .Labels.instance) }}
{{ $memQuery := query (printf "(1 - (mem_available{host=\"%s\"} / mem_total{host=\"%s\"})) * 100" $hostname $hostname) }}
{{ if $memQuery }}
{{ with $memQuery | first }}
<span class="metric-value">{{ . | value | printf "%.1f" }}%</span>
@@ -136,7 +137,7 @@ tr:hover {
{{ end }}
</td>
<td>
{{ $diskQuery := query (printf "(1 - (disk_free_bytes{host=\"%s\",path=\"/\"} / disk_total_bytes{host=\"%s\",path=\"/\"})) * 100" .Labels.instance .Labels.instance) }}
{{ $diskQuery := query (printf "(1 - (disk_free{host=\"%s\",path=\"/\"} / disk_total{host=\"%s\",path=\"/\"})) * 100" $hostname $hostname) }}
{{ if $diskQuery }}
{{ with $diskQuery | first }}
<span class="metric-value">{{ . | value | printf "%.1f" }}%</span>
@@ -161,7 +162,7 @@ tr:hover {
</tr>
</thead>
<tbody>
{{ range query "topk(5, 100 - (avg by(host) (rate(cpu_seconds_total{mode=\"idle\"}[5m])) * 100))" }}
{{ range query "topk(5, 100 - cpu_usage_idle{cpu=\"cpu-total\"})" }}
<tr>
<td>{{ .Labels.host }}</td>
<td><span class="metric-value">{{ .Value | printf "%.1f" }}%</span></td>
@@ -181,7 +182,7 @@ tr:hover {
</tr>
</thead>
<tbody>
{{ range query "topk(5, (1 - (mem_available_bytes / mem_total_bytes)) * 100)" }}
{{ range query "topk(5, (1 - (mem_available / mem_total)) * 100)" }}
<tr>
<td>{{ .Labels.host }}</td>
<td><span class="metric-value">{{ .Value | printf "%.1f" }}%</span></td>
@@ -197,18 +198,16 @@ tr:hover {
<table>
<thead>
<tr>
<th>Alert</th>
<th>Host</th>
<th>Severity</th>
<th>Alert</th>
<th>Value</th>
</tr>
</thead>
<tbody>
{{ range . }}
<tr>
<td>{{ .Labels.alertname }}</td>
<td>{{ or .Labels.host .Labels.instance }}</td>
<td>{{ .Labels.severity }}</td>
<td>{{ .Labels.alertname }}</td>
<td>{{ .Value }}</td>
</tr>
{{ end }}

View File

@@ -39,6 +39,20 @@
"--web.console.templates=${./prometheus-consoles}"
"--web.console.libraries=${./prometheus-consoles}"
];
ruleFiles = [
(pkgs.writeText "prometheus-rules.yml" (
builtins.toJSON {
groups = [
{
name = "alerting-rules";
rules = import ./alert-rules.nix { inherit lib; };
}
];
}
))
];
scrapeConfigs = [
{
job_name = "telegraf";