diff --git a/clanServices/monitoring/prometheus-consoles/index.html b/clanServices/monitoring/prometheus-consoles/index.html new file mode 100644 index 000000000..18c68aced --- /dev/null +++ b/clanServices/monitoring/prometheus-consoles/index.html @@ -0,0 +1,222 @@ + + + + + Clan status + + + + + +
+

System Overview

+ + +

Instance Status

+ + + + + + + + + + + + {{ range query "up" | sortByLabel "instance" }} + + + + + + + + {{ end }} + +
HostStatusCPU UsageMemory UsageDisk Usage
{{ .Labels.instance }} + {{ if eq .Value 1.0 }} + UP + {{ else }} + DOWN + {{ end }} + + {{ $cpuQuery := query (printf "100 - (avg by(host) (rate(cpu_seconds_total{mode=\"idle\",host=\"%s\"}[5m])) * 100)" .Labels.instance) }} + {{ if $cpuQuery }} + {{ with $cpuQuery | first }} + {{ . | value | printf "%.1f" }}% + {{ end }} + {{ else }} + N/A + {{ end }} + + {{ $memQuery := query (printf "(1 - (mem_available_bytes{host=\"%s\"} / mem_total_bytes{host=\"%s\"})) * 100" .Labels.instance .Labels.instance) }} + {{ if $memQuery }} + {{ with $memQuery | first }} + {{ . | value | printf "%.1f" }}% + {{ end }} + {{ else }} + N/A + {{ end }} + + {{ $diskQuery := query (printf "(1 - (disk_free_bytes{host=\"%s\",path=\"/\"} / disk_total_bytes{host=\"%s\",path=\"/\"})) * 100" .Labels.instance .Labels.instance) }} + {{ if $diskQuery }} + {{ with $diskQuery | first }} + {{ . | value | printf "%.1f" }}% + {{ end }} + {{ else }} + N/A + {{ end }} +
+ +
+
+

Top 5 CPU Usage

+ + + + + + + + + {{ range query "topk(5, 100 - (avg by(host) (rate(cpu_seconds_total{mode=\"idle\"}[5m])) * 100))" }} + + + + + {{ end }} + +
HostCPU %
{{ .Labels.host }}{{ .Value | printf "%.1f" }}%
+
+ +
+

Top 5 Memory Usage

+ + + + + + + + + {{ range query "topk(5, (1 - (mem_available_bytes / mem_total_bytes)) * 100)" }} + + + + + {{ end }} + +
HostMemory %
{{ .Labels.host }}{{ .Value | printf "%.1f" }}%
+
+
+ +

Active Alerts

+ {{ with query "ALERTS{alertstate=\"firing\"}" }} + + + + + + + + + + + {{ range . }} + + + + + + + {{ end }} + +
AlertHostSeverityValue
{{ .Labels.alertname }}{{ or .Labels.host .Labels.instance }}{{ .Labels.severity }}{{ .Value }}
+ {{ else }} +
No active alerts
+ {{ end }} +
+ + diff --git a/clanServices/monitoring/prometheus-consoles/style.css b/clanServices/monitoring/prometheus-consoles/style.css new file mode 100644 index 000000000..0e5dbdf61 --- /dev/null +++ b/clanServices/monitoring/prometheus-consoles/style.css @@ -0,0 +1,80 @@ + :root { + --dark: rgb(22, 35, 36); + --light: rgb(229, 231, 235); +} + +body { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; + margin: 0; + padding: 20px; + background: var(--dark); +} +.container { + max-width: 1400px; + margin: 0 auto; + background: var(--light); + padding: 30px; + border-radius: 8px; + box-shadow: 0 1px 3px rgba(0,0,0,0.1); +} +h1 { + margin-top: 0; + color: #333; + border-bottom: 2px solid var(--dark); + padding-bottom: 10px; +} +h2 { + color: #555; + margin-top: 30px; +} +table { + width: 100%; + border-collapse: collapse; + margin: 20px 0; +} +th { + background: var(--dark); + color: var(--light); + padding: 12px; + text-align: left; + font-weight: 600; +} +td { + padding: 10px 12px; + border-bottom: 1px solid #ddd; +} +tr:hover { + background: var(--light); +} +.status-up { + color: #28a745; + font-weight: bold; +} +.status-down { + color: #dc3545; + font-weight: bold; +} +.alert-success { + background: #d4edda; + color: #155724; + padding: 12px; + border-radius: 4px; + border: 1px solid #c3e6cb; +} +.grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(400px, 1fr)); + gap: 20px; + margin: 20px 0; +} +.card { + border: 1px solid #ddd; + border-radius: 4px; + padding: 15px; +} +.metric-value { + font-size: 1.2em; + font-weight: bold; + color: var(--dark); +} + diff --git a/clanServices/monitoring/prometheus.nix b/clanServices/monitoring/prometheus.nix index 780d436ae..aa8bb7efd 100644 --- a/clanServices/monitoring/prometheus.nix +++ b/clanServices/monitoring/prometheus.nix @@ -12,7 +12,7 @@ { config, lib, - # pkgs, + pkgs, ... }: { @@ -33,8 +33,12 @@ # "https://prometheus.${config.clan.core.settings.tld}"; webExternalUrl = settings.webExternalUrl; - extraFlags = [ "--storage.tsdb.retention.time=30d" ]; - + # Configure console templates and libraries paths + extraFlags = [ + "--storage.tsdb.retention.time=30d" + "--web.console.templates=${./prometheus-consoles}" + "--web.console.libraries=${./prometheus-consoles}" + ]; scrapeConfigs = [ { job_name = "telegraf"; diff --git a/clanServices/monitoring/telegraf.nix b/clanServices/monitoring/telegraf.nix index df36dd411..5aa2d8d9a 100644 --- a/clanServices/monitoring/telegraf.nix +++ b/clanServices/monitoring/telegraf.nix @@ -21,7 +21,12 @@ agent.interval = "60s"; inputs = { + # More input plugins available at: + # https://github.com/influxdata/telegraf/tree/master/plugins/inputs diskio = { }; + disk = { }; + cpu = { }; + processes = { }; kernel_vmstat = { }; system = { }; mem = { };