Add monitoring stack: kube-state-metrics, node-exporter, and Alloy
- New monitoring namespace - HelmRepositories: prometheus-community, grafana - kube-state-metrics: Kubernetes state metrics - prometheus-node-exporter: Host metrics DaemonSet - Alloy: Multi-target scraper pushing to Selendis (10.0.0.3:9090 Prometheus, 10.0.0.3:3100 Loki) - Scrapes Flux controllers (flux-system:8080) - Scrapes kube-state-metrics (monitoring:8080) - Scrapes node-exporter DaemonSet (monitoring:9100) - Scrapes Synapse metrics (matrix:9000) - Pushes pod logs to Loki - Monitoring Kustomization added to flux-system Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
1a1c8ee2c6
commit
34bffe40be
135
apps/monitoring/alloy-config.yaml
Normal file
135
apps/monitoring/alloy-config.yaml
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: alloy-config
|
||||||
|
namespace: monitoring
|
||||||
|
data:
|
||||||
|
config.alloy: |
|
||||||
|
// Kubernetes pod discovery
|
||||||
|
discovery.kubernetes "k8s_pods" {
|
||||||
|
role = "pod"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Relabel for Prometheus scrape
|
||||||
|
discovery.relabel "prometheus_pods" {
|
||||||
|
targets = discovery.kubernetes.k8s_pods.targets
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_scrape"]
|
||||||
|
action = "keep"
|
||||||
|
regex = "true"
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_annotation_prometheus_io_path"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "__metrics_path__"
|
||||||
|
regex = "(.+)"
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
source_labels = ["__address__", "__meta_kubernetes_pod_annotation_prometheus_io_port"]
|
||||||
|
action = "replace"
|
||||||
|
regex = "([^:]+)(?::\\d+)?;(\\d+)"
|
||||||
|
replacement = "$1:$2"
|
||||||
|
target_label = "__address__"
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_namespace"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "namespace"
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_name"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "pod"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scrape Flux controllers (flux-system namespace, port 8080)
|
||||||
|
discovery.kubernetes "flux_pods" {
|
||||||
|
role = "pod"
|
||||||
|
namespaces {
|
||||||
|
names = ["flux-system"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
discovery.relabel "flux_scrape" {
|
||||||
|
targets = discovery.kubernetes.flux_pods.targets
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_container_port_number"]
|
||||||
|
action = "keep"
|
||||||
|
regex = "8080"
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_namespace"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "namespace"
|
||||||
|
}
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_kubernetes_pod_name"]
|
||||||
|
action = "replace"
|
||||||
|
target_label = "pod"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scrape kube-state-metrics
|
||||||
|
prometheus.scrape "kube_state_metrics" {
|
||||||
|
targets = [{
|
||||||
|
__address__ = "kube-state-metrics.monitoring.svc.cluster.local:8080",
|
||||||
|
}]
|
||||||
|
forward_to = [prometheus.remote_write.selendis.receiver]
|
||||||
|
scrape_interval = "30s"
|
||||||
|
scrape_timeout = "10s"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scrape Flux controllers
|
||||||
|
prometheus.scrape "flux" {
|
||||||
|
targets = discovery.relabel.flux_scrape.output
|
||||||
|
forward_to = [prometheus.remote_write.selendis.receiver]
|
||||||
|
scrape_interval = "30s"
|
||||||
|
scrape_timeout = "10s"
|
||||||
|
job_name = "flux"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scrape node-exporter DaemonSet
|
||||||
|
prometheus.scrape "node_exporter" {
|
||||||
|
targets = [{
|
||||||
|
__address__ = "prometheus-node-exporter.monitoring.svc.cluster.local:9100",
|
||||||
|
}]
|
||||||
|
forward_to = [prometheus.remote_write.selendis.receiver]
|
||||||
|
scrape_interval = "30s"
|
||||||
|
scrape_timeout = "10s"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scrape Synapse metrics
|
||||||
|
prometheus.scrape "synapse" {
|
||||||
|
targets = [{
|
||||||
|
__address__ = "matrix-stack-synapse-main.matrix.svc.cluster.local:9000",
|
||||||
|
}]
|
||||||
|
forward_to = [prometheus.remote_write.selendis.receiver]
|
||||||
|
scrape_interval = "30s"
|
||||||
|
scrape_timeout = "10s"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kubernetes pod logs to Loki
|
||||||
|
loki.source.kubernetes "k8s_logs" {
|
||||||
|
targets = discovery.kubernetes.k8s_pods.targets
|
||||||
|
forward_to = [loki.write.selendis.receiver]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remote write to Selendis Prometheus
|
||||||
|
prometheus.remote_write "selendis" {
|
||||||
|
endpoint {
|
||||||
|
url = "http://10.0.0.3:9090/api/v1/write"
|
||||||
|
write_relabel_config {
|
||||||
|
source_labels = ["__name__"]
|
||||||
|
regex = "go_.*|process_.*"
|
||||||
|
action = "drop"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remote write logs to Selendis Loki
|
||||||
|
loki.write "selendis" {
|
||||||
|
endpoint {
|
||||||
|
url = "http://10.0.0.3:3100/loki/api/v1/push"
|
||||||
|
}
|
||||||
|
}
|
||||||
33
apps/monitoring/alloy.yaml
Normal file
33
apps/monitoring/alloy.yaml
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||||
|
kind: HelmRelease
|
||||||
|
metadata:
|
||||||
|
name: alloy
|
||||||
|
namespace: monitoring
|
||||||
|
spec:
|
||||||
|
interval: 1h
|
||||||
|
chart:
|
||||||
|
spec:
|
||||||
|
chart: alloy
|
||||||
|
version: "0.x"
|
||||||
|
sourceRef:
|
||||||
|
kind: HelmRepository
|
||||||
|
name: grafana
|
||||||
|
namespace: flux-system
|
||||||
|
values:
|
||||||
|
alloy:
|
||||||
|
configMap:
|
||||||
|
name: alloy-config
|
||||||
|
replicaCount: 1
|
||||||
|
serviceAccount:
|
||||||
|
create: true
|
||||||
|
name: alloy
|
||||||
|
rbac:
|
||||||
|
create: true
|
||||||
|
podAnnotations:
|
||||||
|
prometheus.io/scrape: "false"
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 512Mi
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 256Mi
|
||||||
18
apps/monitoring/helm-repos.yaml
Normal file
18
apps/monitoring/helm-repos.yaml
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
apiVersion: source.toolkit.fluxcd.io/v1beta2
|
||||||
|
kind: HelmRepository
|
||||||
|
metadata:
|
||||||
|
name: prometheus-community
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 1h
|
||||||
|
url: https://prometheus-community.github.io/helm-charts
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: source.toolkit.fluxcd.io/v1beta2
|
||||||
|
kind: HelmRepository
|
||||||
|
metadata:
|
||||||
|
name: grafana
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 1h
|
||||||
|
url: https://grafana.github.io/helm-charts
|
||||||
22
apps/monitoring/kube-state-metrics.yaml
Normal file
22
apps/monitoring/kube-state-metrics.yaml
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||||
|
kind: HelmRelease
|
||||||
|
metadata:
|
||||||
|
name: kube-state-metrics
|
||||||
|
namespace: monitoring
|
||||||
|
spec:
|
||||||
|
interval: 1h
|
||||||
|
chart:
|
||||||
|
spec:
|
||||||
|
chart: kube-state-metrics
|
||||||
|
version: "5.x"
|
||||||
|
sourceRef:
|
||||||
|
kind: HelmRepository
|
||||||
|
name: prometheus-community
|
||||||
|
namespace: flux-system
|
||||||
|
values:
|
||||||
|
replicas: 1
|
||||||
|
service:
|
||||||
|
port: 8080
|
||||||
|
prometheus:
|
||||||
|
monitor:
|
||||||
|
enabled: false
|
||||||
9
apps/monitoring/kustomization.yaml
Normal file
9
apps/monitoring/kustomization.yaml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- namespace.yaml
|
||||||
|
- helm-repos.yaml
|
||||||
|
- kube-state-metrics.yaml
|
||||||
|
- node-exporter.yaml
|
||||||
|
- alloy-config.yaml
|
||||||
|
- alloy.yaml
|
||||||
4
apps/monitoring/namespace.yaml
Normal file
4
apps/monitoring/namespace.yaml
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: monitoring
|
||||||
29
apps/monitoring/node-exporter.yaml
Normal file
29
apps/monitoring/node-exporter.yaml
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||||
|
kind: HelmRelease
|
||||||
|
metadata:
|
||||||
|
name: prometheus-node-exporter
|
||||||
|
namespace: monitoring
|
||||||
|
spec:
|
||||||
|
interval: 1h
|
||||||
|
chart:
|
||||||
|
spec:
|
||||||
|
chart: prometheus-node-exporter
|
||||||
|
version: "4.x"
|
||||||
|
sourceRef:
|
||||||
|
kind: HelmRepository
|
||||||
|
name: prometheus-community
|
||||||
|
namespace: flux-system
|
||||||
|
values:
|
||||||
|
hostNetwork: true
|
||||||
|
hostPID: true
|
||||||
|
hostRootFsMount:
|
||||||
|
enabled: true
|
||||||
|
service:
|
||||||
|
port: 9100
|
||||||
|
targetPort: 9100
|
||||||
|
prometheus:
|
||||||
|
monitor:
|
||||||
|
enabled: false
|
||||||
|
tolerations:
|
||||||
|
- effect: NoSchedule
|
||||||
|
operator: Exists
|
||||||
@ -4,4 +4,5 @@ resources:
|
|||||||
- gotk-components.yaml
|
- gotk-components.yaml
|
||||||
- gotk-sync.yaml
|
- gotk-sync.yaml
|
||||||
- infra-sync.yaml
|
- infra-sync.yaml
|
||||||
|
- monitoring-sync.yaml
|
||||||
- production-sync.yaml
|
- production-sync.yaml
|
||||||
|
|||||||
14
clusters/matrix/flux-system/monitoring-sync.yaml
Normal file
14
clusters/matrix/flux-system/monitoring-sync.yaml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: monitoring-apps
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
path: ./apps/monitoring
|
||||||
|
prune: true
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
dependsOn:
|
||||||
|
- name: infra-apps
|
||||||
Loading…
x
Reference in New Issue
Block a user