Merge pull request #1113 from dod-ccpo/k8s-log-aggregation

K8s log aggregation
This commit is contained in:
dandds 2019-10-14 13:15:41 -04:00 committed by GitHub
commit 73a459ea28
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 513 additions and 0 deletions

View File

@ -30,6 +30,50 @@ envsubst < deploy/aws/storage-class.yml | kubectl apply -f -
When applying configuration changes, be careful not to over-write the storage class configuration without the environment variable substituted.
#### Fluentd Configuration
For the Fluentd/CloudWatch integration to work for logging purposes, you will need to add an additional policy to the worker nodes' role. What follows is adapted from the [EKS Workshop](https://eksworkshop.com/logging/prereqs/).
If you used eksctl to provision the EKS cluster, there will be a CloudFormation stack associated with the cluster. The node instances within the cluster will have a role associated to define their permissions. You need the name of the role. To get it using the AWS CLI, run:
```
export ROLE_NAME=$(aws --profile=dds --region us-east-2 cloudformation describe-stacks --stack-name eksctl-atat-nodegroup-standard-workers | jq -r '.Stacks[].Outputs[] | select(.OutputKey=="InstanceRoleARN") | .OutputValue' | cut -f2 -d/)
```
(This assumes that you have [`jq`](https://stedolan.github.io/jq/) available to parse the JSON response.)
Run `echo $ROLE_NAME` to check that the previous command worked.
Create a file called `k8s-logs-policy.json` and add the following content:
```
{
"Version": "2012-10-17",
"Statement": [
{
"Action": [
"logs:DescribeLogGroups",
"logs:DescribeLogStreams",
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": "*",
"Effect": "Allow"
}
]
}
```
This is the new policy that allows the nodes to aggregate logs. To apply it, run the following with the AWS CLI:
```
aws iam put-role-policy --role-name $ROLE_NAME --policy-name Logs-Policy-For-Worker --policy-document file://./k8s-logs-policy.json
```
(This command assumes you are executing it in the same directory as the policy JSON file; adjust the path as needed.)
### Apply the config to an Azure cluster
To apply the configuration to a new cluster, run:

View File

@ -0,0 +1,8 @@
apiVersion: v1
data:
cluster.name: atat
logs.region: us-east-2
kind: ConfigMap
metadata:
name: cluster-info
namespace: amazon-cloudwatch

View File

@ -0,0 +1,7 @@
# create amazon-cloudwatch namespace
apiVersion: v1
kind: Namespace
metadata:
name: amazon-cloudwatch
labels:
name: amazon-cloudwatch

433
deploy/aws/fluentd.yaml Normal file
View File

@ -0,0 +1,433 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: fluentd
namespace: amazon-cloudwatch
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: fluentd-role
rules:
- apiGroups: [""]
resources:
- namespaces
- pods
- pods/logs
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: fluentd-role-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: fluentd-role
subjects:
- kind: ServiceAccount
name: fluentd
namespace: amazon-cloudwatch
---
apiVersion: v1
kind: ConfigMap
metadata:
name: fluentd-config
namespace: amazon-cloudwatch
labels:
k8s-app: fluentd-cloudwatch
data:
fluent.conf: |
@include containers.conf
@include systemd.conf
@include host.conf
<match fluent.**>
@type null
</match>
containers.conf: |
<source>
@type tail
@id in_tail_container_logs
@label @containers
path /var/log/containers/*.log
exclude_path ["/var/log/containers/cloudwatch-agent*", "/var/log/containers/fluentd*"]
pos_file /var/log/fluentd-containers.log.pos
tag *
read_from_head true
<parse>
@type json
time_format %Y-%m-%dT%H:%M:%S.%NZ
</parse>
</source>
<source>
@type tail
@id in_tail_cwagent_logs
@label @cwagentlogs
path /var/log/containers/cloudwatch-agent*
pos_file /var/log/cloudwatch-agent.log.pos
tag *
read_from_head true
<parse>
@type json
time_format %Y-%m-%dT%H:%M:%S.%NZ
</parse>
</source>
<source>
@type tail
@id in_tail_fluentd_logs
@label @fluentdlogs
path /var/log/containers/fluentd*
pos_file /var/log/fluentd.log.pos
tag *
read_from_head true
<parse>
@type json
time_format %Y-%m-%dT%H:%M:%S.%NZ
</parse>
</source>
<label @fluentdlogs>
<filter **>
@type kubernetes_metadata
@id filter_kube_metadata_fluentd
</filter>
<filter **>
@type record_transformer
@id filter_fluentd_stream_transformer
<record>
stream_name ${tag_parts[3]}
</record>
</filter>
<match **>
@type relabel
@label @NORMAL
</match>
</label>
<label @containers>
<filter **>
@type kubernetes_metadata
@id filter_kube_metadata
</filter>
<filter **>
@type record_transformer
@id filter_containers_stream_transformer
<record>
stream_name ${tag_parts[3]}
</record>
</filter>
<filter **>
@type concat
key log
multiline_start_regexp /^\S/
separator ""
flush_interval 5
timeout_label @NORMAL
</filter>
<match **>
@type relabel
@label @NORMAL
</match>
</label>
<label @cwagentlogs>
<filter **>
@type kubernetes_metadata
@id filter_kube_metadata_cwagent
</filter>
<filter **>
@type record_transformer
@id filter_cwagent_stream_transformer
<record>
stream_name ${tag_parts[3]}
</record>
</filter>
<filter **>
@type concat
key log
multiline_start_regexp /^\d{4}[-/]\d{1,2}[-/]\d{1,2}/
separator ""
flush_interval 5
timeout_label @NORMAL
</filter>
<match **>
@type relabel
@label @NORMAL
</match>
</label>
<label @NORMAL>
<match **>
@type cloudwatch_logs
@id out_cloudwatch_logs_containers
region "#{ENV.fetch('REGION')}"
log_group_name "/aws/containerinsights/#{ENV.fetch('CLUSTER_NAME')}/application"
log_stream_name_key stream_name
remove_log_stream_name_key true
auto_create_stream true
<buffer>
flush_interval 5
chunk_limit_size 2m
queued_chunks_limit_size 32
retry_forever true
</buffer>
</match>
</label>
systemd.conf: |
<source>
@type systemd
@id in_systemd_kubelet
@label @systemd
filters [{ "_SYSTEMD_UNIT": "kubelet.service" }]
<entry>
field_map {"MESSAGE": "message", "_HOSTNAME": "hostname", "_SYSTEMD_UNIT": "systemd_unit"}
field_map_strict true
</entry>
path /var/log/journal
<storage>
@type local
persistent true
path /var/log/fluentd-journald-kubelet-pos.json
</storage>
read_from_head true
tag kubelet.service
</source>
<source>
@type systemd
@id in_systemd_kubeproxy
@label @systemd
filters [{ "_SYSTEMD_UNIT": "kubeproxy.service" }]
<entry>
field_map {"MESSAGE": "message", "_HOSTNAME": "hostname", "_SYSTEMD_UNIT": "systemd_unit"}
field_map_strict true
</entry>
path /var/log/journal
<storage>
@type local
persistent true
path /var/log/fluentd-journald-kubeproxy-pos.json
</storage>
read_from_head true
tag kubeproxy.service
</source>
<source>
@type systemd
@id in_systemd_docker
@label @systemd
filters [{ "_SYSTEMD_UNIT": "docker.service" }]
<entry>
field_map {"MESSAGE": "message", "_HOSTNAME": "hostname", "_SYSTEMD_UNIT": "systemd_unit"}
field_map_strict true
</entry>
path /var/log/journal
<storage>
@type local
persistent true
path /var/log/fluentd-journald-docker-pos.json
</storage>
read_from_head true
tag docker.service
</source>
<label @systemd>
<filter **>
@type kubernetes_metadata
@id filter_kube_metadata_systemd
</filter>
<filter **>
@type record_transformer
@id filter_systemd_stream_transformer
<record>
stream_name ${tag}-${record["hostname"]}
</record>
</filter>
<match **>
@type cloudwatch_logs
@id out_cloudwatch_logs_systemd
region "#{ENV.fetch('REGION')}"
log_group_name "/aws/containerinsights/#{ENV.fetch('CLUSTER_NAME')}/dataplane"
log_stream_name_key stream_name
auto_create_stream true
remove_log_stream_name_key true
<buffer>
flush_interval 5
chunk_limit_size 2m
queued_chunks_limit_size 32
retry_forever true
</buffer>
</match>
</label>
host.conf: |
<source>
@type tail
@id in_tail_dmesg
@label @hostlogs
path /var/log/dmesg
pos_file /var/log/dmesg.log.pos
tag host.dmesg
read_from_head true
<parse>
@type syslog
</parse>
</source>
<source>
@type tail
@id in_tail_secure
@label @hostlogs
path /var/log/secure
pos_file /var/log/secure.log.pos
tag host.secure
read_from_head true
<parse>
@type syslog
</parse>
</source>
<source>
@type tail
@id in_tail_messages
@label @hostlogs
path /var/log/messages
pos_file /var/log/messages.log.pos
tag host.messages
read_from_head true
<parse>
@type syslog
</parse>
</source>
<label @hostlogs>
<filter **>
@type kubernetes_metadata
@id filter_kube_metadata_host
</filter>
<filter **>
@type record_transformer
@id filter_containers_stream_transformer_host
<record>
stream_name ${tag}-${record["host"]}
</record>
</filter>
<match host.**>
@type cloudwatch_logs
@id out_cloudwatch_logs_host_logs
region "#{ENV.fetch('REGION')}"
log_group_name "/aws/containerinsights/#{ENV.fetch('CLUSTER_NAME')}/host"
log_stream_name_key stream_name
remove_log_stream_name_key true
auto_create_stream true
<buffer>
flush_interval 5
chunk_limit_size 2m
queued_chunks_limit_size 32
retry_forever true
</buffer>
</match>
</label>
---
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: fluentd-cloudwatch
namespace: amazon-cloudwatch
labels:
k8s-app: fluentd-cloudwatch
spec:
template:
metadata:
labels:
k8s-app: fluentd-cloudwatch
annotations:
configHash: 8915de4cf9c3551a8dc74c0137a3e83569d28c71044b0359c2578d2e0461825
spec:
serviceAccountName: fluentd
terminationGracePeriodSeconds: 30
# Because the image's entrypoint requires to write on /fluentd/etc but we mount configmap there which is read-only,
# this initContainers workaround or other is needed.
# See https://github.com/fluent/fluentd-kubernetes-daemonset/issues/90
initContainers:
- name: copy-fluentd-config
image: busybox
command: ['sh', '-c', 'cp /config-volume/..data/* /fluentd/etc']
volumeMounts:
- name: config-volume
mountPath: /config-volume
- name: fluentdconf
mountPath: /fluentd/etc
- name: update-log-driver
image: busybox
command: ['sh','-c','']
containers:
- name: fluentd-cloudwatch
image: fluent/fluentd-kubernetes-daemonset:v1.3.3-debian-cloudwatch-1.4
env:
- name: REGION
valueFrom:
configMapKeyRef:
name: cluster-info
key: logs.region
- name: CLUSTER_NAME
valueFrom:
configMapKeyRef:
name: cluster-info
key: cluster.name
- name: CI_VERSION
value: "k8s/1.0.0"
resources:
limits:
memory: 200Mi
requests:
cpu: 100m
memory: 200Mi
volumeMounts:
- name: config-volume
mountPath: /config-volume
- name: fluentdconf
mountPath: /fluentd/etc
- name: varlog
mountPath: /var/log
- name: varlibdockercontainers
mountPath: /var/lib/docker/containers
readOnly: true
- name: runlogjournal
mountPath: /run/log/journal
readOnly: true
- name: dmesg
mountPath: /var/log/dmesg
readOnly: true
volumes:
- name: config-volume
configMap:
name: fluentd-config
- name: fluentdconf
emptyDir: {}
- name: varlog
hostPath:
path: /var/log
- name: varlibdockercontainers
hostPath:
path: /var/lib/docker/containers
- name: runlogjournal
hostPath:
path: /run/log/journal
- name: dmesg
hostPath:
path: /var/log/dmesg

View File

@ -0,0 +1,21 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: containerHealth-log-reader
rules:
- apiGroups: [""]
resources: ["pods/log", "events"]
verbs: ["get", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: containerHealth-read-logs-global
roleRef:
kind: ClusterRole
name: containerHealth-log-reader
apiGroup: rbac.authorization.k8s.io
subjects:
- kind: User
name: clusterUser
apiGroup: rbac.authorization.k8s.io