diff --git a/control-operator/api/v1alpha1/task_types.go b/control-operator/api/v1alpha1/task_types.go index ace24d3e..6b6d751c 100644 --- a/control-operator/api/v1alpha1/task_types.go +++ b/control-operator/api/v1alpha1/task_types.go @@ -88,6 +88,13 @@ type TaskSpec struct { NodeName string `json:"nodeName,omitempty"` } +const ( + ConditionPodReady = "PodReady" + ConditionGRPCConnected = "GRPCConnected" + ConditionStateInitialized = "StateInitialized" + ConditionStateTransitioned = "StateTransitioned" +) + // TaskStatus defines the observed state of Task type TaskStatus struct { // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster @@ -95,6 +102,10 @@ type TaskStatus struct { Pod v1.PodStatus `json:"pod,omitempty"` State string `json:"state,omitempty"` Error string `json:"error,omitempty"` + // +listType=map + // +listMapKey=type + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` } // +kubebuilder:object:root=true diff --git a/control-operator/api/v1alpha1/zz_generated.deepcopy.go b/control-operator/api/v1alpha1/zz_generated.deepcopy.go index 628dc1b4..695e2f3c 100644 --- a/control-operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/control-operator/api/v1alpha1/zz_generated.deepcopy.go @@ -366,6 +366,13 @@ func (in *TaskSpecControl) DeepCopy() *TaskSpecControl { func (in *TaskStatus) DeepCopyInto(out *TaskStatus) { *out = *in in.Pod.DeepCopyInto(&out.Pod) + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskStatus. diff --git a/control-operator/config/crd/bases/aliecs.alice.cern_tasks.yaml b/control-operator/config/crd/bases/aliecs.alice.cern_tasks.yaml index 6bef94e6..415cdee8 100644 --- a/control-operator/config/crd/bases/aliecs.alice.cern_tasks.yaml +++ b/control-operator/config/crd/bases/aliecs.alice.cern_tasks.yaml @@ -3931,6 +3931,45 @@ spec: type: object status: properties: + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + maxLength: 32768 + type: string + observedGeneration: + format: int64 + minimum: 0 + type: integer + reason: + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + enum: + - "True" + - "False" + - Unknown + type: string + type: + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map error: type: string pod: diff --git a/control-operator/config/manager/environment-manager/environment-manager.yaml b/control-operator/config/manager/environment-manager/environment-manager.yaml index 19e2a2df..ee2f87d7 100644 --- a/control-operator/config/manager/environment-manager/environment-manager.yaml +++ b/control-operator/config/manager/environment-manager/environment-manager.yaml @@ -34,6 +34,7 @@ spec: - --leader-elect - --health-probe-bind-address=:9080 - --metrics-bind-address=:9081 + - --zap-encoder=json image: environment-manager:latest name: manager securityContext: diff --git a/control-operator/config/manager/task-manager/task-manager.yaml b/control-operator/config/manager/task-manager/task-manager.yaml index 09c538cc..374ae332 100644 --- a/control-operator/config/manager/task-manager/task-manager.yaml +++ b/control-operator/config/manager/task-manager/task-manager.yaml @@ -33,6 +33,7 @@ spec: args: - --health-probe-bind-address=:9082 - --metrics-bind-address=:9083 + - --zap-encoder=json env: - name: NODE_NAME valueFrom: diff --git a/control-operator/internal/controller/task_controller.go b/control-operator/internal/controller/task_controller.go index ff3fc21f..76a8b731 100644 --- a/control-operator/internal/controller/task_controller.go +++ b/control-operator/internal/controller/task_controller.go @@ -34,6 +34,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -127,6 +128,9 @@ func (r *TaskReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. if err := r.Status().Update(ctx, t); err != nil { return ctrl.Result{}, err } + if err := r.recordCondition(ctx, t, aliecsv1alpha1.ConditionPodReady, metav1.ConditionFalse, "PodFailed", reason); err != nil { + return ctrl.Result{}, err + } } // Always stop reconciliation if the Pod is in a failed state return ctrl.Result{}, nil @@ -137,6 +141,9 @@ func (r *TaskReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. log.Info("pod doesn't have IP yet, we wait for different event") return ctrl.Result{}, nil } + if err := r.recordCondition(ctx, t, aliecsv1alpha1.ConditionPodReady, metav1.ConditionTrue, "PodRunning", fmt.Sprintf("Pod %s has IP %s", existingPod.Name, existingPod.Status.PodIP)); err != nil { + return ctrl.Result{}, err + } res, err := r.createGRPCConsumer(ctx, t, existingPod, req.NamespacedName, log) if err != nil || !res.IsZero() { return res, err @@ -169,6 +176,9 @@ func (r *TaskReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. if err := r.Status().Update(ctx, t); err != nil { return ctrl.Result{}, err } + if err := r.recordCondition(ctx, t, aliecsv1alpha1.ConditionStateInitialized, metav1.ConditionTrue, "StateQueried", fmt.Sprintf("Initial state: %s", t.Status.State)); err != nil { + return ctrl.Result{}, err + } return ctrl.Result{}, nil } @@ -214,6 +224,9 @@ func (r *TaskReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. if err := r.Status().Update(ctx, t); err != nil { return ctrl.Result{}, err } + if err := r.recordCondition(ctx, t, aliecsv1alpha1.ConditionStateTransitioned, metav1.ConditionFalse, "TransitionFailed", fmt.Sprintf("Transition from %s to %s failed: %s", stateReply.GetState(), t.Spec.State, transErr.Error())); err != nil { + return ctrl.Result{}, err + } return ctrl.Result{}, nil } if newState != "" { @@ -232,6 +245,11 @@ func (r *TaskReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. if err := r.Status().Update(ctx, t); err != nil { return ctrl.Result{}, err } + if t.Status.State != oldStatus.State { + if err := r.recordCondition(ctx, t, aliecsv1alpha1.ConditionStateTransitioned, metav1.ConditionTrue, "TransitionComplete", fmt.Sprintf("Transitioned from %s to %s", oldStatus.State, t.Status.State)); err != nil { + return ctrl.Result{}, err + } + } } return ctrl.Result{}, nil @@ -252,6 +270,9 @@ func (r *TaskReconciler) createGRPCConsumer(ctx context.Context, t *aliecsv1alph clientsForContainers[t.Name] = client + if err := r.recordCondition(ctx, t, aliecsv1alpha1.ConditionGRPCConnected, metav1.ConditionTrue, "Connected", fmt.Sprintf("gRPC connection established to %s", addr)); err != nil { + return ctrl.Result{}, err + } return ctrl.Result{}, nil } @@ -385,6 +406,22 @@ func (r *TaskReconciler) SetupWithManager(mgr ctrl.Manager) error { Complete(r) } +func (r *TaskReconciler) recordCondition(ctx context.Context, t *aliecsv1alpha1.Task, condType string, condStatus metav1.ConditionStatus, reason, message string) error { + patch := client.MergeFrom(t.DeepCopy()) + eventType := v1.EventTypeNormal + if condStatus == metav1.ConditionFalse { + eventType = v1.EventTypeWarning + } + meta.SetStatusCondition(&t.Status.Conditions, metav1.Condition{ + Type: condType, + Status: condStatus, + Reason: reason, + Message: message, + }) + r.Recorder.Event(t, eventType, reason, message) + return r.Status().Patch(ctx, t, patch) +} + func prettyPrint(i any) string { s, err := json.MarshalIndent(i, "", " ") if err != nil { diff --git a/control-operator/k3s-observability/manifests/fluent-bit-audit.yml b/control-operator/k3s-observability/manifests/fluent-bit-audit.yml new file mode 100644 index 00000000..6e438dbe --- /dev/null +++ b/control-operator/k3s-observability/manifests/fluent-bit-audit.yml @@ -0,0 +1,98 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: fluent-bit-audit-config + namespace: kube-system +data: + fluent-bit-audit.yml: | + service: + flush: 5 + log_level: info + + parsers: + - name: audit-json + format: json + time_key: requestReceivedTimestamp + time_format: "%Y-%m-%dT%H:%M:%S.%LZ" + + pipeline: + inputs: + - name: tail + path: /var/log/k3s-audit.log + tag: k8s.audit + parser: audit-json + db: /db/fluent-bit-audit.db + buffer_max_size: 2MB + skip_long_lines: off + + filters: + - name: nest + match: k8s.audit + operation: lift + nested_under: objectRef + add_prefix: objectRef_ + + - name: nest + match: k8s.audit + operation: lift + nested_under: user + add_prefix: user_ + + outputs: + - name: forward + match: k8s.audit + host: ${OPENSEARCH_HOST} + port: ${OPENSEARCH_PORT} +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: fluent-bit-audit + namespace: kube-system +spec: + selector: + matchLabels: + app: fluent-bit-audit + template: + metadata: + labels: + app: fluent-bit-audit + annotations: + reloader.stakater.com/auto: "true" + spec: + nodeSelector: + node-role.kubernetes.io/control-plane: "true" + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + containers: + - name: fluent-bit + image: fluent/fluent-bit:5.0 + args: + - -c + - /etc/fluent-bit-audit/fluent-bit-audit.yml + envFrom: + - configMapRef: + name: opensearch-config + volumeMounts: + - name: config + mountPath: /etc/fluent-bit-audit/fluent-bit-audit.yml + subPath: fluent-bit-audit.yml + - name: auditlog + mountPath: /var/log/k3s-audit.log + readOnly: true + - name: db + mountPath: /db + volumes: + - name: config + configMap: + name: fluent-bit-audit-config + - name: auditlog + hostPath: + path: /var/log/k3s-audit.log + type: File + - name: db + hostPath: + path: /var/log/fluent-bit-audit + type: DirectoryOrCreate diff --git a/control-operator/k3s-observability/manifests/fluent-bit-events.yml b/control-operator/k3s-observability/manifests/fluent-bit-events.yml new file mode 100644 index 00000000..cdff9507 --- /dev/null +++ b/control-operator/k3s-observability/manifests/fluent-bit-events.yml @@ -0,0 +1,94 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: fluent-bit-events-config + namespace: kube-system +data: + fluent-bit.yml: | + service: + flush: 5 + log_level: info + + pipeline: + inputs: + - name: kubernetes_events + tag: kube.events + kube_url: https://kubernetes.default.svc:443 + kube_ca_file: /var/lib/rancher/k3s/server/tls/server-ca.crt + kube_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + outputs: + - name: forward + match: kube.events + host: ${OPENSEARCH_HOST} + port: ${OPENSEARCH_PORT} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: fluent-bit-events + namespace: kube-system +spec: + replicas: 1 + selector: + matchLabels: + app: fluent-bit-events + template: + metadata: + labels: + app: fluent-bit-events + annotations: + reloader.stakater.com/auto: "true" + spec: + serviceAccountName: fluent-bit-events + containers: + - name: fluent-bit + image: fluent/fluent-bit:5.0 + args: + - -c + - /etc/fluent-bit/fluent-bit.yml + envFrom: + - configMapRef: + name: opensearch-config + volumeMounts: + - name: config + mountPath: /etc/fluent-bit/fluent-bit.yml + subPath: fluent-bit.yml + - name: k3s-tls + mountPath: /var/lib/rancher/k3s/server/tls + readOnly: true + volumes: + - name: config + configMap: + name: fluent-bit-events-config + - name: k3s-tls + hostPath: + path: /var/lib/rancher/k3s/server/tls +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: fluent-bit-events + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: fluent-bit-events +rules: +- apiGroups: [""] + resources: [events] + verbs: [get, list, watch] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: fluent-bit-events +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: fluent-bit-events +subjects: +- kind: ServiceAccount + name: fluent-bit-events + namespace: kube-system diff --git a/control-operator/k3s-observability/manifests/fluent-bit-logs.yml b/control-operator/k3s-observability/manifests/fluent-bit-logs.yml new file mode 100644 index 00000000..cd852ea7 --- /dev/null +++ b/control-operator/k3s-observability/manifests/fluent-bit-logs.yml @@ -0,0 +1,123 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: fluent-bit-logs-config + namespace: kube-system +data: + fluent-bit-logs.yml: | + service: + flush: 5 + log_level: info + + pipeline: + inputs: + - name: tail + path: /var/log/containers/*.log + multiline.parser: cri + tag: kube.* + mem_buf_limit: 50MB + skip_long_lines: on + + + filters: + - name: kubernetes + match: kube.* + kube_url: https://kubernetes.default.svc:443 + kube_ca_file: /var/lib/rancher/k3s/server/tls/server-ca.crt + kube_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + labels: on + annotations: off + merge_log: on + + - name: nest + match: kube.* + operation: lift + nested_under: kubernetes + add_prefix: kubernetes. + + - name: modify + match: kube.* + rename: msg log + + outputs: + - name: forward + match: kube.* + host: ${OPENSEARCH_HOST} + port: ${OPENSEARCH_PORT} +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: fluent-bit-logs + namespace: kube-system +spec: + selector: + matchLabels: + app: fluent-bit-logs + template: + metadata: + labels: + app: fluent-bit-logs + annotations: + reloader.stakater.com/auto: "true" + spec: + serviceAccountName: fluent-bit-logs + tolerations: + - operator: Exists + containers: + - name: fluent-bit + image: fluent/fluent-bit:5.0 + args: + - -c + - /etc/fluent-bit-logs/fluent-bit-logs.yml + envFrom: + - configMapRef: + name: opensearch-config + volumeMounts: + - name: config + mountPath: /etc/fluent-bit-logs/fluent-bit-logs.yml + subPath: fluent-bit-logs.yml + - name: varlog + mountPath: /var/log + readOnly: true + - name: k3s-tls + mountPath: /var/lib/rancher/k3s/server/tls + readOnly: true + volumes: + - name: config + configMap: + name: fluent-bit-logs-config + - name: varlog + hostPath: + path: /var/log + - name: k3s-tls + hostPath: + path: /var/lib/rancher/k3s/server/tls +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: fluent-bit-logs + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: fluent-bit-logs +rules: +- apiGroups: [""] + resources: [pods, namespaces] + verbs: [get, list, watch] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: fluent-bit-logs +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: fluent-bit-logs +subjects: +- kind: ServiceAccount + name: fluent-bit-logs + namespace: kube-system diff --git a/control-operator/k3s-observability/manifests/opensearch-config.yml b/control-operator/k3s-observability/manifests/opensearch-config.yml new file mode 100644 index 00000000..883499fc --- /dev/null +++ b/control-operator/k3s-observability/manifests/opensearch-config.yml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: opensearch-config + namespace: kube-system +data: + OPENSEARCH_HOST: mtichak-opensearch.cern.ch + OPENSEARCH_PORT: "24224" diff --git a/control-operator/k3s-observability/other/audit-policy.yaml b/control-operator/k3s-observability/other/audit-policy.yaml new file mode 100644 index 00000000..b4e8cb46 --- /dev/null +++ b/control-operator/k3s-observability/other/audit-policy.yaml @@ -0,0 +1,27 @@ +apiVersion: audit.k8s.io/v1 +kind: Policy +rules: + # Full request+response for ALIECS custom resources + - level: RequestResponse + omitManagedFields: true + verbs: [create, update, patch, delete] + resources: + - group: aliecs.alice.cern + resources: [environments, tasks, tasktemplates] + + # Pod lifecycle — metadata only (no need for full spec, we have container logs) + - level: Metadata + verbs: [create, delete] + resources: + - group: "" + resources: [pods] + + # Drop noisy non-events + - level: None + resources: + - group: coordination.k8s.io + resources: [leases] + - level: None + users: [system:node:*] + - level: None + nonResourceURLs: [/healthz*, /readyz*, /livez*, /metrics] diff --git a/docs/k3s_observability.md b/docs/k3s_observability.md new file mode 100644 index 00000000..22ad1696 --- /dev/null +++ b/docs/k3s_observability.md @@ -0,0 +1,124 @@ +# K3s Observability + +> ⚠️ **Warning** +> This observability setup is a prototype configured for a specific OpenSearch instance. Adjust `opensearch-config.yml` before deploying to a different environment. + +All manifests and server-side configuration live in `control-operator/k3s-observability/`: + +``` +k3s-observability/ +├── manifests/ # applied via kubectl +│ ├── opensearch-config.yml +│ ├── fluent-bit-events.yml +│ ├── fluent-bit-logs.yml +│ └── fluent-bit-audit.yml +└── other/ # deployed manually on the k3s server node + └── audit-policy.yaml +``` + +## Overview + +Three fluent-bit components run inside the k3s cluster and forward data to an external observability stack via the Fluent Forward protocol (port 24224 on `OPENSEARCH_HOST`): + +| Manifest | Kind | What it collects | +|---|---|---| +| `fluent-bit-events.yml` | Deployment | Kubernetes `Event` objects — pod lifecycle, gRPC connections, controller-emitted events | +| `fluent-bit-logs.yml` | DaemonSet | Container stdout/stderr from all pods | +| `fluent-bit-audit.yml` | DaemonSet (control-plane only) | Kubernetes API audit log — full CRD specs on create/update/delete | + +The external observability stack (Fluent Bit → OTel Collector → Data Prepper → OpenSearch) receives and processes the forwarded data. + +`OPENSEARCH_HOST` and `OPENSEARCH_PORT` in `opensearch-config.yml` point at the observability Fluent Bit forward input, not at OpenSearch directly. All k3s Fluent Bit components read these via `envFrom`. + +[Reloader](https://github.com/stakater/Reloader) can be used to automatically restart any pods whenever their ConfigMap changes including the fluent-bit ones. However it is not required for fluent-bit deployment. Each Deployment/DaemonSet has the annotation `reloader.stakater.com/auto: "true"` on the pod template. + +## Deployment + +### First-time setup + +**1. Configure OpenSearch endpoint** + +Edit `manifests/opensearch-config.yml` with the correct host and port, then apply all manifests: + +```bash +kubectl apply -f control-operator/k3s-observability/manifests/ +``` + +**2. Set up audit logging on the k3s server node** + +Copy the audit policy to the server: +```bash +scp control-operator/k3s-observability/other/audit-policy.yaml :/etc/rancher/k3s/audit-policy.yaml +``` + +Create `/etc/rancher/k3s/config.yaml` on the server (create it if it doesn't exist): +```yaml +kube-apiserver-arg: + - "audit-log-path=/var/log/k3s-audit.log" + - "audit-policy-file=/etc/rancher/k3s/audit-policy.yaml" + - "audit-log-maxage=7" + - "audit-log-maxbackup=3" + - "audit-log-maxsize=100" +``` + +Restart k3s. If leftover containerd-shim processes block the restart: +```bash +/usr/local/bin/k3s-killall.sh && systemctl start k3s +``` + +**(OPTIONAL) 3. Install Reloader** +```bash +kubectl apply -f https://raw.githubusercontent.com/stakater/Reloader/master/deployments/kubernetes/reloader.yaml +``` + +### Updating config + +After any change to the manifests: +```bash +kubectl apply -f control-operator/k3s-observability/manifests/ +``` + +Reloader will automatically restart affected pods when their ConfigMap changes. + +## What is recorded and where + +### Kubernetes Events (`fluent-bit-events`) + +Watches the Kubernetes `Event` API directly. Captures events emitted by kubelet and the ALIECS controllers: + +- Pod lifecycle: `Created`, `Started`, `Killing` (explicit kill), `BackOff` (crash loop) +- Task controller: pod IP assignment, gRPC connection established, pod failure detected +- Notable gap: containers that exit on their own do not generate a kubelet `Killing` event — their exit is only visible in pod status. The task controller emits a `PodFailed` event to fill this gap. + +Query in OpenSearch: `WHERE attributes.kind = 'Event'` + +### Container logs (`fluent-bit-logs`) + +Tails `/var/log/containers/*.log` on every node. Captures stdout/stderr from all containers including the task and environment managers. + +The ALIECS controllers are configured with `--zap-encoder=json` so their log lines are pure JSON. The fluent-bit `merge_log: on` option parses these automatically, lifting structured fields as queryable attributes. The OTel Collector further normalises controller logs — including mapping the Go `level` field (`debug`/`info`/`warn`/`error`) to OTLP `severity_text` and `severity_number` so that log level filtering works correctly in OpenSearch Dashboards: + +### Audit log (`fluent-bit-audit`) + +Tails `/var/log/k3s-audit.log` on the control-plane node. Records every API server interaction matching the audit policy. + +**What is captured:** + +| Resource | Level | Verbs | +|---|---|---| +| ALIECS CRDs (Task, Environment, TaskTemplate) | `RequestResponse` (full spec) | create, update, patch, delete | +| Pods | `Metadata` (no body) | create, delete | + +`RequestResponse` means the full request and response body is logged — i.e. the complete spec of every Task and Environment CRD at the time it was created or modified. This gives a persistent record of what was deployed even after the CRD is deleted. + +`managedFields` is stripped at source via `omitManagedFields: true` in the audit policy. This field uses `.` as a JSON key (Kubernetes FieldsV1 format), which OpenSearch rejects. Removing it at the kube-apiserver level is cleaner than filtering it in the pipeline. + +Pod deletion (which sets the pod to Terminating) is captured at `Metadata` level via `verb: delete`. + +What is **not** captured: pod status transitions (Running → Terminating → Succeeded/Failed) — these are `patch` operations on the Pod object and are excluded to avoid noise. + +## Audit policy + +The audit policy at `other/audit-policy.yaml` is a server-side file read by the kube-apiserver at startup — it is **not** a Kubernetes resource and cannot be applied with `kubectl`. Any change to it requires copying the file to the server and restarting k3s. + +Noise excluded by policy: lease updates, node heartbeats, health/metrics endpoints. `managedFields` is excluded from all captured events via `omitManagedFields: true` on the ALIECS CRD rule.