Add monitoring stack: kube-state-metrics, node-exporter, and Alloy
- New monitoring namespace - HelmRepositories: prometheus-community, grafana - kube-state-metrics: Kubernetes state metrics - prometheus-node-exporter: Host metrics DaemonSet - Alloy: Multi-target scraper pushing to Selendis (10.0.0.3:9090 Prometheus, 10.0.0.3:3100 Loki) - Scrapes Flux controllers (flux-system:8080) - Scrapes kube-state-metrics (monitoring:8080) - Scrapes node-exporter DaemonSet (monitoring:9100) - Scrapes Synapse metrics (matrix:9000) - Pushes pod logs to Loki - Monitoring Kustomization added to flux-system Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
1a1c8ee2c6
commit
34bffe40be
135
apps/monitoring/alloy-config.yaml
Normal file
135
apps/monitoring/alloy-config.yaml
Normal file
@ -0,0 +1,135 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: alloy-config
|
||||
namespace: monitoring
|
||||
data:
|
||||
config.alloy: |
|
||||
// Kubernetes pod discovery
|
||||
discovery.kubernetes "k8s_pods" {
|
||||
role = "pod"
|
||||
}
|
||||
|
||||
// Relabel for Prometheus scrape
|
||||
discovery.relabel "prometheus_pods" {
|
||||
targets = discovery.kubernetes.k8s_pods.targets
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scrape"]
|
||||
action = "keep"
|
||||
regex = "true"
|
||||
}
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_path"]
|
||||
action = "replace"
|
||||
target_label = "__metrics_path__"
|
||||
regex = "(.+)"
|
||||
}
|
||||
rule {
|
||||
source_labels = ["__address__", "__meta_kubernetes_pod_annotation_prometheus_io_port"]
|
||||
action = "replace"
|
||||
regex = "([^:]+)(?::\\d+)?;(\\d+)"
|
||||
replacement = "$1:$2"
|
||||
target_label = "__address__"
|
||||
}
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_namespace"]
|
||||
action = "replace"
|
||||
target_label = "namespace"
|
||||
}
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_name"]
|
||||
action = "replace"
|
||||
target_label = "pod"
|
||||
}
|
||||
}
|
||||
|
||||
// Scrape Flux controllers (flux-system namespace, port 8080)
|
||||
discovery.kubernetes "flux_pods" {
|
||||
role = "pod"
|
||||
namespaces {
|
||||
names = ["flux-system"]
|
||||
}
|
||||
}
|
||||
|
||||
discovery.relabel "flux_scrape" {
|
||||
targets = discovery.kubernetes.flux_pods.targets
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_container_port_number"]
|
||||
action = "keep"
|
||||
regex = "8080"
|
||||
}
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_namespace"]
|
||||
action = "replace"
|
||||
target_label = "namespace"
|
||||
}
|
||||
rule {
|
||||
source_labels = ["__meta_kubernetes_pod_name"]
|
||||
action = "replace"
|
||||
target_label = "pod"
|
||||
}
|
||||
}
|
||||
|
||||
// Scrape kube-state-metrics
|
||||
prometheus.scrape "kube_state_metrics" {
|
||||
targets = [{
|
||||
__address__ = "kube-state-metrics.monitoring.svc.cluster.local:8080",
|
||||
}]
|
||||
forward_to = [prometheus.remote_write.selendis.receiver]
|
||||
scrape_interval = "30s"
|
||||
scrape_timeout = "10s"
|
||||
}
|
||||
|
||||
// Scrape Flux controllers
|
||||
prometheus.scrape "flux" {
|
||||
targets = discovery.relabel.flux_scrape.output
|
||||
forward_to = [prometheus.remote_write.selendis.receiver]
|
||||
scrape_interval = "30s"
|
||||
scrape_timeout = "10s"
|
||||
job_name = "flux"
|
||||
}
|
||||
|
||||
// Scrape node-exporter DaemonSet
|
||||
prometheus.scrape "node_exporter" {
|
||||
targets = [{
|
||||
__address__ = "prometheus-node-exporter.monitoring.svc.cluster.local:9100",
|
||||
}]
|
||||
forward_to = [prometheus.remote_write.selendis.receiver]
|
||||
scrape_interval = "30s"
|
||||
scrape_timeout = "10s"
|
||||
}
|
||||
|
||||
// Scrape Synapse metrics
|
||||
prometheus.scrape "synapse" {
|
||||
targets = [{
|
||||
__address__ = "matrix-stack-synapse-main.matrix.svc.cluster.local:9000",
|
||||
}]
|
||||
forward_to = [prometheus.remote_write.selendis.receiver]
|
||||
scrape_interval = "30s"
|
||||
scrape_timeout = "10s"
|
||||
}
|
||||
|
||||
// Kubernetes pod logs to Loki
|
||||
loki.source.kubernetes "k8s_logs" {
|
||||
targets = discovery.kubernetes.k8s_pods.targets
|
||||
forward_to = [loki.write.selendis.receiver]
|
||||
}
|
||||
|
||||
// Remote write to Selendis Prometheus
|
||||
prometheus.remote_write "selendis" {
|
||||
endpoint {
|
||||
url = "http://10.0.0.3:9090/api/v1/write"
|
||||
write_relabel_config {
|
||||
source_labels = ["__name__"]
|
||||
regex = "go_.*|process_.*"
|
||||
action = "drop"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remote write logs to Selendis Loki
|
||||
loki.write "selendis" {
|
||||
endpoint {
|
||||
url = "http://10.0.0.3:3100/loki/api/v1/push"
|
||||
}
|
||||
}
|
||||
33
apps/monitoring/alloy.yaml
Normal file
33
apps/monitoring/alloy.yaml
Normal file
@ -0,0 +1,33 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: alloy
|
||||
namespace: monitoring
|
||||
spec:
|
||||
interval: 1h
|
||||
chart:
|
||||
spec:
|
||||
chart: alloy
|
||||
version: "0.x"
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: grafana
|
||||
namespace: flux-system
|
||||
values:
|
||||
alloy:
|
||||
configMap:
|
||||
name: alloy-config
|
||||
replicaCount: 1
|
||||
serviceAccount:
|
||||
create: true
|
||||
name: alloy
|
||||
rbac:
|
||||
create: true
|
||||
podAnnotations:
|
||||
prometheus.io/scrape: "false"
|
||||
resources:
|
||||
limits:
|
||||
memory: 512Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
18
apps/monitoring/helm-repos.yaml
Normal file
18
apps/monitoring/helm-repos.yaml
Normal file
@ -0,0 +1,18 @@
|
||||
apiVersion: source.toolkit.fluxcd.io/v1beta2
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: prometheus-community
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 1h
|
||||
url: https://prometheus-community.github.io/helm-charts
|
||||
|
||||
---
|
||||
apiVersion: source.toolkit.fluxcd.io/v1beta2
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: grafana
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 1h
|
||||
url: https://grafana.github.io/helm-charts
|
||||
22
apps/monitoring/kube-state-metrics.yaml
Normal file
22
apps/monitoring/kube-state-metrics.yaml
Normal file
@ -0,0 +1,22 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
interval: 1h
|
||||
chart:
|
||||
spec:
|
||||
chart: kube-state-metrics
|
||||
version: "5.x"
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: prometheus-community
|
||||
namespace: flux-system
|
||||
values:
|
||||
replicas: 1
|
||||
service:
|
||||
port: 8080
|
||||
prometheus:
|
||||
monitor:
|
||||
enabled: false
|
||||
9
apps/monitoring/kustomization.yaml
Normal file
9
apps/monitoring/kustomization.yaml
Normal file
@ -0,0 +1,9 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- helm-repos.yaml
|
||||
- kube-state-metrics.yaml
|
||||
- node-exporter.yaml
|
||||
- alloy-config.yaml
|
||||
- alloy.yaml
|
||||
4
apps/monitoring/namespace.yaml
Normal file
4
apps/monitoring/namespace.yaml
Normal file
@ -0,0 +1,4 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: monitoring
|
||||
29
apps/monitoring/node-exporter.yaml
Normal file
29
apps/monitoring/node-exporter.yaml
Normal file
@ -0,0 +1,29 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: prometheus-node-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
interval: 1h
|
||||
chart:
|
||||
spec:
|
||||
chart: prometheus-node-exporter
|
||||
version: "4.x"
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: prometheus-community
|
||||
namespace: flux-system
|
||||
values:
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
hostRootFsMount:
|
||||
enabled: true
|
||||
service:
|
||||
port: 9100
|
||||
targetPort: 9100
|
||||
prometheus:
|
||||
monitor:
|
||||
enabled: false
|
||||
tolerations:
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
@ -4,4 +4,5 @@ resources:
|
||||
- gotk-components.yaml
|
||||
- gotk-sync.yaml
|
||||
- infra-sync.yaml
|
||||
- monitoring-sync.yaml
|
||||
- production-sync.yaml
|
||||
|
||||
14
clusters/matrix/flux-system/monitoring-sync.yaml
Normal file
14
clusters/matrix/flux-system/monitoring-sync.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: monitoring-apps
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
path: ./apps/monitoring
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
dependsOn:
|
||||
- name: infra-apps
|
||||
Loading…
x
Reference in New Issue
Block a user