Add WB monitoring stack

This commit is contained in:
Kochetkov S 2026-05-25 13:18:25 +03:00
parent 3f5fd12152
commit bb6a2e4ef1
41 changed files with 809 additions and 0 deletions

View File

@ -0,0 +1,12 @@
---
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: yc-oci-charts
namespace: flux-system
spec:
type: oci
interval: 10m0s
url: oci://cr.yandex/crp3ccidau046kdj8g9q/charts
secretRef:
name: yc-cr-auth

View File

@ -0,0 +1,77 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../../infrastructure/glitchtip
- ../../../infrastructure/openobserve
- ../../../infrastructure/vmstack
- ../../../infrastructure/prometheus-stack
- ../../../infrastructure/opentelemetry-operator
- ../../../infrastructure/opentelemetry-collector
- ../../../infrastructure/goalert
- ../../../infrastructure/kafka-exporter
- ../../../infrastructure/postgres-exporter
- ./secrets.yaml
patches:
- path: ./patches/glitchtip.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: glitchtip
namespace: glitchtip
- path: ./patches/openobserve.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: openobserve
namespace: openobserve
- path: ./patches/vmstack.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: vmstack
namespace: vmstack
- path: ./patches/prometheus-stack.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: prometheus-stack
namespace: prometheus-stack
- path: ./patches/opentelemetry-operator.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: opentelemetry-operator
namespace: opentelemetry-operator
- path: ./patches/opentelemetry-collector.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: opentelemetry-collector
namespace: opentelemetry-collector
- path: ./patches/goalert.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: goalert
namespace: goalert
- path: ./patches/kafka-exporter.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: kafka-exporter
namespace: kafka-exporter
- path: ./patches/postgres-exporter.yaml
target:
group: helm.toolkit.fluxcd.io
version: v2
kind: HelmRelease
name: postgres-exporter
namespace: postgres-exporter

View File

@ -0,0 +1,53 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: glitchtip
namespace: glitchtip
spec:
interval: 5m
timeout: 10m
values:
universal-chart:
services:
web:
deployment:
replicaCount:
_default: 1
envs:
- name: SERVER_ROLE
value:
_default: web
- name: PORT
value:
_default: "8000"
- name: GLITCHTIP_DOMAIN
value:
_default: https://glitchtip-srx.wb.ru
- name: ENABLE_OPEN_USER_REGISTRATION
value:
_default: "false"
worker:
deployment:
replicaCount:
_default: 1
envs:
- name: SERVER_ROLE
value:
_default: worker
- name: PORT
value:
_default: "8000"
- name: GLITCHTIP_DOMAIN
value:
_default: https://glitchtip-srx.wb.ru
glitchtip:
secret:
create: false
name: glitchtip-secret
migrate:
enabled: true
env:
PORT: "8000"
GLITCHTIP_DOMAIN: https://glitchtip-srx.wb.ru
vault:
enabled: false

View File

@ -0,0 +1,41 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: goalert
namespace: goalert
spec:
interval: 5m
timeout: 10m
values:
image:
name: cr.yandex/crp3ccidau046kdj8g9q/goalert
tag: 0.32.0
pullPolicy: IfNotPresent
goalert:
existingSecret:
name: goalert-secret
keys:
GOALERT_DB_URL: GOALERT_DB_URL
GOALERT_DATA_ENCRYPTION_KEY: GOALERT_DATA_ENCRYPTION_KEY
environment:
GOALERT_HTTP_PREFIX: ""
postgresql:
enabled: true
auth:
database: goalert
username: goalert
password: uO/9PHoznDImghirkrNicFQ3EwFmUX0s
sslmode: disable
primary:
persistence:
enabled: true
storageClass: local-path
size: 10Gi
ingress:
enabled: true
className: nginx
hosts:
- host: vmalert-srx.wb.ru
paths:
- path: /
pathType: Prefix

View File

@ -0,0 +1,26 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: kafka-exporter
namespace: kafka-exporter
spec:
interval: 5m
timeout: 10m
values:
image:
repository: cr.yandex/crp3ccidau046kdj8g9q/kafka-exporter
tag: latest
pullPolicy: IfNotPresent
kafkaExporter:
kafka:
servers:
- kafka.kafka.svc.cluster.local:9092
sasl:
enabled: false
tls:
enabled: false
prometheus:
serviceMonitor:
enabled: true
namespace: kafka-exporter
interval: 30s

View File

@ -0,0 +1,35 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: openobserve
namespace: openobserve
spec:
interval: 5m
timeout: 10m
values:
universal-chart:
services:
openobserve:
deployment:
replicaCount:
_default: 1
envs:
- name: ZO_HTTP_PORT
value:
_default: "5080"
- name: ZO_LOCAL_MODE
value:
_default: "true"
- name: ZO_TELEMETRY
value:
_default: "false"
openobserve:
secret:
create: false
name: openobserve-secret
nats:
enabled: false
otelCollector:
enabled: false
vault:
enabled: false

View File

@ -0,0 +1,53 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: opentelemetry-collector
namespace: opentelemetry-collector
spec:
interval: 5m
timeout: 10m
values:
mode: daemonset
fullnameOverride: otel-collector
presets:
logsCollection:
enabled: true
includeCollectorLogs: false
kubernetesAttributes:
enabled: true
config:
receivers:
filelog:
include:
- /var/log/pods/*/*/*.log
exclude:
- /var/log/pods/opentelemetry-collector_*/*/*.log
start_at: end
operators:
- type: container
otlp:
protocols:
grpc:
endpoint: ${env:MY_POD_IP}:4317
http:
endpoint: ${env:MY_POD_IP}:4318
processors:
batch: {}
k8sattributes: {}
exporters:
otlphttp/openobserve:
endpoint: http://openobserve.openobserve.svc.cluster.local:5080/api/default
headers:
Authorization: Basic YWRtaW5AZ3JhZmFuYS1zcngud2IucnU6NERoc2ZobnBvNTRIQkZkKzFUMzZkUDFUUUhJa3NjWDU=
tls:
insecure: true
service:
pipelines:
logs:
receivers:
- filelog
processors:
- k8sattributes
- batch
exporters:
- otlphttp/openobserve

View File

@ -0,0 +1,17 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: opentelemetry-operator
namespace: opentelemetry-operator
spec:
interval: 5m
timeout: 10m
values:
manager:
collectorImage:
repository: cr.yandex/crp3ccidau046kdj8g9q/opentelemetry-collector
admissionWebhooks:
certManager:
enabled: false
autoGenerateCert:
enabled: true

View File

@ -0,0 +1,25 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: postgres-exporter
namespace: postgres-exporter
spec:
interval: 5m
timeout: 10m
values:
image:
name: cr.yandex/crp3ccidau046kdj8g9q/postgres-exporter:v0.18.1
pullPolicy: IfNotPresent
serviceMonitor:
enabled: true
namespace: postgres-exporter
config:
datasource:
host: postgresql.postgresql.svc.cluster.local
user: postgres
port: "5432"
database: postgres
sslmode: disable
datasourceSecret:
name: postgres-exporter-secret
key: data_source_name

View File

@ -0,0 +1,60 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: prometheus-stack
namespace: prometheus-stack
spec:
interval: 5m
timeout: 20m
values:
fullnameOverride: prometheus-stack
crds:
enabled: true
defaultRules:
create: true
alertmanager:
enabled: false
prometheus:
enabled: false
prometheusOperator:
enabled: true
kubeStateMetrics:
enabled: true
nodeExporter:
enabled: true
grafana:
enabled: true
admin:
existingSecret: grafana-admin
userKey: admin-user
passwordKey: admin-password
persistence:
enabled: true
type: sts
storageClassName: local-path
accessModes:
- ReadWriteOnce
size: 20Gi
ingress:
enabled: true
ingressClassName: nginx
hosts:
- grafana-srx.wb.ru
path: /
env:
GF_SERVER_DOMAIN: grafana-srx.wb.ru
GF_SERVER_ROOT_URL: https://grafana-srx.wb.ru/
sidecar:
dashboards:
enabled: true
searchNamespace: ALL
label: grafana_dashboard
labelValue: "1"
datasources:
enabled: true
additionalDataSources:
- name: VictoriaMetrics
type: prometheus
access: proxy
isDefault: true
url: http://vmsingle-vmstack.vmstack.svc.cluster.local:8428

View File

@ -0,0 +1,73 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: vmstack
namespace: vmstack
spec:
interval: 5m
timeout: 20m
values:
global:
clusterLabel: wb
nameOverride: vmstack
fullnameOverride: vmstack
defaultRules:
create: true
vmsingle:
enabled: true
spec:
retentionPeriod: 1w
replicaCount: 1
storage:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 100Gi
storageClassName: local-path
vmcluster:
enabled: false
vmagent:
enabled: true
vmalert:
enabled: true
spec:
evaluationInterval: 30s
extraArgs:
external.url: https://vmalert-srx.wb.ru
ingress:
enabled: true
ingressClassName: nginx
hosts:
- vmalert-srx.wb.ru
path: /
alertmanager:
enabled: true
spec:
replicaCount: 1
externalURL: https://vmalert-srx.wb.ru
volumes: []
volumeMounts: []
config:
global:
resolve_timeout: 5m
route:
receiver: goalert
group_by:
- alertname
- namespace
- pod
group_wait: 30s
group_interval: 5m
repeat_interval: 6h
receivers:
- name: goalert
webhook_configs:
- url: http://goalert.goalert.svc.cluster.local:8081/api/v2/prometheusalertmanager/incoming
send_resolved: true
kube-state-metrics:
enabled: true
prometheus-node-exporter:
enabled: true
grafana:
enabled: false

View File

@ -0,0 +1,65 @@
---
apiVersion: v1
kind: Secret
metadata:
name: grafana-admin
namespace: prometheus-stack
type: Opaque
stringData:
admin-user: grafana-admin
admin-password: zTaC8vmiQ8f0hit7JpmxGePAwxkizGli
---
apiVersion: v1
kind: Secret
metadata:
name: glitchtip-secret
namespace: glitchtip
type: Opaque
stringData:
SECRET_KEY: xgMg/IOVYgbzTbyeXtCrKwd90ytbKhal
DATABASE_URL: sqlite:////data/glitchtip.sqlite3
REDIS_URL: redis://localhost:6379/0
EMAIL_URL: consolemail://
DEFAULT_FROM_EMAIL: glitchtip@grafana-srx.wb.ru
GLITCHTIP_DOMAIN: https://glitchtip-srx.wb.ru
ENABLE_OPEN_USER_REGISTRATION: "false"
---
apiVersion: v1
kind: Secret
metadata:
name: openobserve-secret
namespace: openobserve
type: Opaque
stringData:
ZO_ROOT_USER_EMAIL: admin@grafana-srx.wb.ru
ZO_ROOT_USER_PASSWORD: 4Dhsfhnpo54HBFd+1T36dP1TQHIkscX5
ZO_LOCAL_MODE: "true"
---
apiVersion: v1
kind: Secret
metadata:
name: goalert-secret
namespace: goalert
type: Opaque
stringData:
GOALERT_DB_URL: postgres://goalert:uO/9PHoznDImghirkrNicFQ3EwFmUX0s@goalert-postgresql:5432/goalert?sslmode=disable
GOALERT_DATA_ENCRYPTION_KEY: 0f92e3881fb83d26d216c6cc772ea3249b52c038ddbdad5bef48f716fa871464
---
apiVersion: v1
kind: Secret
metadata:
name: kafka-exporter-secret
namespace: kafka-exporter
type: Opaque
stringData:
username: ""
password: ""
---
apiVersion: v1
kind: Secret
metadata:
name: postgres-exporter-secret
namespace: postgres-exporter
type: Opaque
stringData:
data_source_name: postgresql://postgres:XbJdnxqiU2aAKWjVfn2Lvd0B/mFya+ce@postgresql.postgresql.svc.cluster.local:5432/postgres?sslmode=disable

View File

@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./flux-system
- ./helm-repositories.yaml
- ./infrastructure

View File

@ -0,0 +1,22 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: goalert
namespace: goalert
spec:
interval: 10m
chart:
spec:
chart: goalert
version: "0.32.0"
sourceRef:
kind: HelmRepository
name: yc-oci-charts
namespace: flux-system
interval: 10m
install:
remediation:
retries: 3
upgrade:
remediation:
retries: 3

View File

@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: goalert
resources:
- namespace.yaml
- helmrelease.yaml

View File

@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: goalert
labels:
monitoring: enabled

View File

@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- base

View File

@ -0,0 +1,22 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: kafka-exporter
namespace: kafka-exporter
spec:
interval: 10m
chart:
spec:
chart: kafka-exporter-prod
version: "0.27.0"
sourceRef:
kind: HelmRepository
name: yc-oci-charts
namespace: flux-system
interval: 10m
install:
remediation:
retries: 3
upgrade:
remediation:
retries: 3

View File

@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: kafka-exporter
resources:
- namespace.yaml
- helmrelease.yaml

View File

@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: kafka-exporter
labels:
monitoring: enabled

View File

@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- base

View File

@ -0,0 +1,22 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: opentelemetry-collector
namespace: opentelemetry-collector
spec:
interval: 10m
chart:
spec:
chart: opentelemetry-collector-prod
version: "0.117.1"
sourceRef:
kind: HelmRepository
name: yc-oci-charts
namespace: flux-system
interval: 10m
install:
remediation:
retries: 3
upgrade:
remediation:
retries: 3

View File

@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: opentelemetry-collector
resources:
- namespace.yaml
- helmrelease.yaml

View File

@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: opentelemetry-collector
labels:
monitoring: enabled

View File

@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- base

View File

@ -0,0 +1,22 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: opentelemetry-operator
namespace: opentelemetry-operator
spec:
interval: 10m
chart:
spec:
chart: opentelemetry-operator-preprod
version: "0.81.1"
sourceRef:
kind: HelmRepository
name: yc-oci-charts
namespace: flux-system
interval: 10m
install:
remediation:
retries: 3
upgrade:
remediation:
retries: 3

View File

@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: opentelemetry-operator
resources:
- namespace.yaml
- helmrelease.yaml

View File

@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: opentelemetry-operator
labels:
monitoring: enabled

View File

@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- base

View File

@ -0,0 +1,22 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: postgres-exporter
namespace: postgres-exporter
spec:
interval: 10m
chart:
spec:
chart: postgres-exporter
version: "0.0.1-prod"
sourceRef:
kind: HelmRepository
name: yc-oci-charts
namespace: flux-system
interval: 10m
install:
remediation:
retries: 3
upgrade:
remediation:
retries: 3

View File

@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: postgres-exporter
resources:
- namespace.yaml
- helmrelease.yaml

View File

@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: postgres-exporter
labels:
monitoring: enabled

View File

@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- base

View File

@ -0,0 +1,22 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: prometheus-stack
namespace: prometheus-stack
spec:
interval: 10m
chart:
spec:
chart: prometheus-stack-prod
version: "71.2.0"
sourceRef:
kind: HelmRepository
name: yc-oci-charts
namespace: flux-system
interval: 10m
install:
remediation:
retries: 3
upgrade:
remediation:
retries: 3

View File

@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: prometheus-stack
resources:
- namespace.yaml
- helmrelease.yaml

View File

@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: prometheus-stack
labels:
monitoring: enabled

View File

@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- base

View File

@ -0,0 +1,22 @@
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: vmstack
namespace: vmstack
spec:
interval: 10m
chart:
spec:
chart: victoria-metrics-k8s-stack
version: "0.63.2"
sourceRef:
kind: HelmRepository
name: yc-oci-charts
namespace: flux-system
interval: 10m
install:
remediation:
retries: 3
upgrade:
remediation:
retries: 3

View File

@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: vmstack
resources:
- namespace.yaml
- helmrelease.yaml

View File

@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: vmstack
labels:
monitoring: enabled

View File

@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- base