Add monitoring stack: kube-state-metrics, node-exporter, and Alloy

- New monitoring namespace
- HelmRepositories: prometheus-community, grafana
- kube-state-metrics: Kubernetes state metrics
- prometheus-node-exporter: Host metrics DaemonSet
- Alloy: Multi-target scraper pushing to Selendis (10.0.0.3:9090 Prometheus, 10.0.0.3:3100 Loki)
  - Scrapes Flux controllers (flux-system:8080)
  - Scrapes kube-state-metrics (monitoring:8080)
  - Scrapes node-exporter DaemonSet (monitoring:9100)
  - Scrapes Synapse metrics (matrix:9000)
  - Pushes pod logs to Loki
- Monitoring Kustomization added to flux-system

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Scrublord MacBad 2026-04-24 16:14:17 +02:00
parent 1a1c8ee2c6
commit 34bffe40be
9 changed files with 265 additions and 0 deletions

View File

@ -0,0 +1,135 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: alloy-config
namespace: monitoring
data:
config.alloy: |
// Kubernetes pod discovery
discovery.kubernetes "k8s_pods" {
role = "pod"
}
// Relabel for Prometheus scrape
discovery.relabel "prometheus_pods" {
targets = discovery.kubernetes.k8s_pods.targets
rule {
source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scrape"]
action = "keep"
regex = "true"
}
rule {
source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_path"]
action = "replace"
target_label = "__metrics_path__"
regex = "(.+)"
}
rule {
source_labels = ["__address__", "__meta_kubernetes_pod_annotation_prometheus_io_port"]
action = "replace"
regex = "([^:]+)(?::\\d+)?;(\\d+)"
replacement = "$1:$2"
target_label = "__address__"
}
rule {
source_labels = ["__meta_kubernetes_namespace"]
action = "replace"
target_label = "namespace"
}
rule {
source_labels = ["__meta_kubernetes_pod_name"]
action = "replace"
target_label = "pod"
}
}
// Scrape Flux controllers (flux-system namespace, port 8080)
discovery.kubernetes "flux_pods" {
role = "pod"
namespaces {
names = ["flux-system"]
}
}
discovery.relabel "flux_scrape" {
targets = discovery.kubernetes.flux_pods.targets
rule {
source_labels = ["__meta_kubernetes_pod_container_port_number"]
action = "keep"
regex = "8080"
}
rule {
source_labels = ["__meta_kubernetes_namespace"]
action = "replace"
target_label = "namespace"
}
rule {
source_labels = ["__meta_kubernetes_pod_name"]
action = "replace"
target_label = "pod"
}
}
// Scrape kube-state-metrics
prometheus.scrape "kube_state_metrics" {
targets = [{
__address__ = "kube-state-metrics.monitoring.svc.cluster.local:8080",
}]
forward_to = [prometheus.remote_write.selendis.receiver]
scrape_interval = "30s"
scrape_timeout = "10s"
}
// Scrape Flux controllers
prometheus.scrape "flux" {
targets = discovery.relabel.flux_scrape.output
forward_to = [prometheus.remote_write.selendis.receiver]
scrape_interval = "30s"
scrape_timeout = "10s"
job_name = "flux"
}
// Scrape node-exporter DaemonSet
prometheus.scrape "node_exporter" {
targets = [{
__address__ = "prometheus-node-exporter.monitoring.svc.cluster.local:9100",
}]
forward_to = [prometheus.remote_write.selendis.receiver]
scrape_interval = "30s"
scrape_timeout = "10s"
}
// Scrape Synapse metrics
prometheus.scrape "synapse" {
targets = [{
__address__ = "matrix-stack-synapse-main.matrix.svc.cluster.local:9000",
}]
forward_to = [prometheus.remote_write.selendis.receiver]
scrape_interval = "30s"
scrape_timeout = "10s"
}
// Kubernetes pod logs to Loki
loki.source.kubernetes "k8s_logs" {
targets = discovery.kubernetes.k8s_pods.targets
forward_to = [loki.write.selendis.receiver]
}
// Remote write to Selendis Prometheus
prometheus.remote_write "selendis" {
endpoint {
url = "http://10.0.0.3:9090/api/v1/write"
write_relabel_config {
source_labels = ["__name__"]
regex = "go_.*|process_.*"
action = "drop"
}
}
}
// Remote write logs to Selendis Loki
loki.write "selendis" {
endpoint {
url = "http://10.0.0.3:3100/loki/api/v1/push"
}
}

View File

@ -0,0 +1,33 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: alloy
namespace: monitoring
spec:
interval: 1h
chart:
spec:
chart: alloy
version: "0.x"
sourceRef:
kind: HelmRepository
name: grafana
namespace: flux-system
values:
alloy:
configMap:
name: alloy-config
replicaCount: 1
serviceAccount:
create: true
name: alloy
rbac:
create: true
podAnnotations:
prometheus.io/scrape: "false"
resources:
limits:
memory: 512Mi
requests:
cpu: 100m
memory: 256Mi

View File

@ -0,0 +1,18 @@
apiVersion: source.toolkit.fluxcd.io/v1beta2
kind: HelmRepository
metadata:
name: prometheus-community
namespace: flux-system
spec:
interval: 1h
url: https://prometheus-community.github.io/helm-charts
---
apiVersion: source.toolkit.fluxcd.io/v1beta2
kind: HelmRepository
metadata:
name: grafana
namespace: flux-system
spec:
interval: 1h
url: https://grafana.github.io/helm-charts

View File

@ -0,0 +1,22 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: kube-state-metrics
namespace: monitoring
spec:
interval: 1h
chart:
spec:
chart: kube-state-metrics
version: "5.x"
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: flux-system
values:
replicas: 1
service:
port: 8080
prometheus:
monitor:
enabled: false

View File

@ -0,0 +1,9 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- helm-repos.yaml
- kube-state-metrics.yaml
- node-exporter.yaml
- alloy-config.yaml
- alloy.yaml

View File

@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: monitoring

View File

@ -0,0 +1,29 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: prometheus-node-exporter
namespace: monitoring
spec:
interval: 1h
chart:
spec:
chart: prometheus-node-exporter
version: "4.x"
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: flux-system
values:
hostNetwork: true
hostPID: true
hostRootFsMount:
enabled: true
service:
port: 9100
targetPort: 9100
prometheus:
monitor:
enabled: false
tolerations:
- effect: NoSchedule
operator: Exists

View File

@ -4,4 +4,5 @@ resources:
- gotk-components.yaml - gotk-components.yaml
- gotk-sync.yaml - gotk-sync.yaml
- infra-sync.yaml - infra-sync.yaml
- monitoring-sync.yaml
- production-sync.yaml - production-sync.yaml

View File

@ -0,0 +1,14 @@
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: monitoring-apps
namespace: flux-system
spec:
interval: 10m
path: ./apps/monitoring
prune: true
sourceRef:
kind: GitRepository
name: flux-system
dependsOn:
- name: infra-apps