Add k8s config for adding Fluentd and piping logs to CloudWatch.

With this configuration, all Kubernetes logs within the ATAT cluster
will be sent to AWS CloudWatch.

Note that this requires applying an additional IAM policy to the worker
nodes' role.
This commit is contained in:
dandds 2019-10-11 12:41:55 -04:00
parent bbd0ffe1a9
commit 05c84877dd
4 changed files with 492 additions and 0 deletions

View File

@ -30,6 +30,50 @@ envsubst < deploy/aws/storage-class.yml | kubectl apply -f -
When applying configuration changes, be careful not to over-write the storage class configuration without the environment variable substituted. When applying configuration changes, be careful not to over-write the storage class configuration without the environment variable substituted.
#### Fluentd Configuration
For the Fluentd/CloudWatch integration to work for logging purposes, you will need to add an additional policy to the worker nodes' role. What follows is adapted from the [EKS Workshop](https://eksworkshop.com/logging/prereqs/).
If you used eksctl to provision the EKS cluster, there will be a CloudFormation stack associated with the cluster. The node instances within the cluster will have a role associated to define their permissions. You need the name of the role. To get it using the AWS CLI, run:
```
export ROLE_NAME=$(aws --profile=dds --region us-east-2 cloudformation describe-stacks --stack-name eksctl-atat-nodegroup-standard-workers | jq -r '.Stacks[].Outputs[] | select(.OutputKey=="InstanceRoleARN") | .OutputValue' | cut -f2 -d/)
```
(This assumes that you have [`jq`](https://stedolan.github.io/jq/) available to parse the JSON response.)
Run `echo $ROLE_NAME` to check that the previous command worked.
Create a file called `k8s-logs-policy.json` and add the following content:
```
{
"Version": "2012-10-17",
"Statement": [
{
"Action": [
"logs:DescribeLogGroups",
"logs:DescribeLogStreams",
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": "*",
"Effect": "Allow"
}
]
}
```
This is the new policy that allows the nodes to aggregate logs. To apply it, run the following with the AWS CLI:
```
aws iam put-role-policy --role-name $ROLE_NAME --policy-name Logs-Policy-For-Worker --policy-document file://./k8s-logs-policy.json
```
(This command assumes you are executing it in the same directory as the policy JSON file; adjust the path as needed.)
### Apply the config to an Azure cluster ### Apply the config to an Azure cluster
To apply the configuration to a new cluster, run: To apply the configuration to a new cluster, run:

View File

@ -0,0 +1,8 @@
apiVersion: v1
data:
cluster.name: atat
logs.region: us-east-2
kind: ConfigMap
metadata:
name: cluster-info
namespace: amazon-cloudwatch

View File

@ -0,0 +1,7 @@
# create amazon-cloudwatch namespace
apiVersion: v1
kind: Namespace
metadata:
name: amazon-cloudwatch
labels:
name: amazon-cloudwatch

433
deploy/aws/fluentd.yaml Normal file
View File

@ -0,0 +1,433 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: fluentd
namespace: amazon-cloudwatch
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: fluentd-role
rules:
- apiGroups: [""]
resources:
- namespaces
- pods
- pods/logs
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: fluentd-role-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: fluentd-role
subjects:
- kind: ServiceAccount
name: fluentd
namespace: amazon-cloudwatch
---
apiVersion: v1
kind: ConfigMap
metadata:
name: fluentd-config
namespace: amazon-cloudwatch
labels:
k8s-app: fluentd-cloudwatch
data:
fluent.conf: |
@include containers.conf
@include systemd.conf
@include host.conf
<match fluent.**>
@type null
</match>
containers.conf: |
<source>
@type tail
@id in_tail_container_logs
@label @containers
path /var/log/containers/*.log
exclude_path ["/var/log/containers/cloudwatch-agent*", "/var/log/containers/fluentd*"]
pos_file /var/log/fluentd-containers.log.pos
tag *
read_from_head true
<parse>
@type json
time_format %Y-%m-%dT%H:%M:%S.%NZ
</parse>
</source>
<source>
@type tail
@id in_tail_cwagent_logs
@label @cwagentlogs
path /var/log/containers/cloudwatch-agent*
pos_file /var/log/cloudwatch-agent.log.pos
tag *
read_from_head true
<parse>
@type json
time_format %Y-%m-%dT%H:%M:%S.%NZ
</parse>
</source>
<source>
@type tail
@id in_tail_fluentd_logs
@label @fluentdlogs
path /var/log/containers/fluentd*
pos_file /var/log/fluentd.log.pos
tag *
read_from_head true
<parse>
@type json
time_format %Y-%m-%dT%H:%M:%S.%NZ
</parse>
</source>
<label @fluentdlogs>
<filter **>
@type kubernetes_metadata
@id filter_kube_metadata_fluentd
</filter>
<filter **>
@type record_transformer
@id filter_fluentd_stream_transformer
<record>
stream_name ${tag_parts[3]}
</record>
</filter>
<match **>
@type relabel
@label @NORMAL
</match>
</label>
<label @containers>
<filter **>
@type kubernetes_metadata
@id filter_kube_metadata
</filter>
<filter **>
@type record_transformer
@id filter_containers_stream_transformer
<record>
stream_name ${tag_parts[3]}
</record>
</filter>
<filter **>
@type concat
key log
multiline_start_regexp /^\S/
separator ""
flush_interval 5
timeout_label @NORMAL
</filter>
<match **>
@type relabel
@label @NORMAL
</match>
</label>
<label @cwagentlogs>
<filter **>
@type kubernetes_metadata
@id filter_kube_metadata_cwagent
</filter>
<filter **>
@type record_transformer
@id filter_cwagent_stream_transformer
<record>
stream_name ${tag_parts[3]}
</record>
</filter>
<filter **>
@type concat
key log
multiline_start_regexp /^\d{4}[-/]\d{1,2}[-/]\d{1,2}/
separator ""
flush_interval 5
timeout_label @NORMAL
</filter>
<match **>
@type relabel
@label @NORMAL
</match>
</label>
<label @NORMAL>
<match **>
@type cloudwatch_logs
@id out_cloudwatch_logs_containers
region "#{ENV.fetch('REGION')}"
log_group_name "/aws/containerinsights/#{ENV.fetch('CLUSTER_NAME')}/application"
log_stream_name_key stream_name
remove_log_stream_name_key true
auto_create_stream true
<buffer>
flush_interval 5
chunk_limit_size 2m
queued_chunks_limit_size 32
retry_forever true
</buffer>
</match>
</label>
systemd.conf: |
<source>
@type systemd
@id in_systemd_kubelet
@label @systemd
filters [{ "_SYSTEMD_UNIT": "kubelet.service" }]
<entry>
field_map {"MESSAGE": "message", "_HOSTNAME": "hostname", "_SYSTEMD_UNIT": "systemd_unit"}
field_map_strict true
</entry>
path /var/log/journal
<storage>
@type local
persistent true
path /var/log/fluentd-journald-kubelet-pos.json
</storage>
read_from_head true
tag kubelet.service
</source>
<source>
@type systemd
@id in_systemd_kubeproxy
@label @systemd
filters [{ "_SYSTEMD_UNIT": "kubeproxy.service" }]
<entry>
field_map {"MESSAGE": "message", "_HOSTNAME": "hostname", "_SYSTEMD_UNIT": "systemd_unit"}
field_map_strict true
</entry>
path /var/log/journal
<storage>
@type local
persistent true
path /var/log/fluentd-journald-kubeproxy-pos.json
</storage>
read_from_head true
tag kubeproxy.service
</source>
<source>
@type systemd
@id in_systemd_docker
@label @systemd
filters [{ "_SYSTEMD_UNIT": "docker.service" }]
<entry>
field_map {"MESSAGE": "message", "_HOSTNAME": "hostname", "_SYSTEMD_UNIT": "systemd_unit"}
field_map_strict true
</entry>
path /var/log/journal
<storage>
@type local
persistent true
path /var/log/fluentd-journald-docker-pos.json
</storage>
read_from_head true
tag docker.service
</source>
<label @systemd>
<filter **>
@type kubernetes_metadata
@id filter_kube_metadata_systemd
</filter>
<filter **>
@type record_transformer
@id filter_systemd_stream_transformer
<record>
stream_name ${tag}-${record["hostname"]}
</record>
</filter>
<match **>
@type cloudwatch_logs
@id out_cloudwatch_logs_systemd
region "#{ENV.fetch('REGION')}"
log_group_name "/aws/containerinsights/#{ENV.fetch('CLUSTER_NAME')}/dataplane"
log_stream_name_key stream_name
auto_create_stream true
remove_log_stream_name_key true
<buffer>
flush_interval 5
chunk_limit_size 2m
queued_chunks_limit_size 32
retry_forever true
</buffer>
</match>
</label>
host.conf: |
<source>
@type tail
@id in_tail_dmesg
@label @hostlogs
path /var/log/dmesg
pos_file /var/log/dmesg.log.pos
tag host.dmesg
read_from_head true
<parse>
@type syslog
</parse>
</source>
<source>
@type tail
@id in_tail_secure
@label @hostlogs
path /var/log/secure
pos_file /var/log/secure.log.pos
tag host.secure
read_from_head true
<parse>
@type syslog
</parse>
</source>
<source>
@type tail
@id in_tail_messages
@label @hostlogs
path /var/log/messages
pos_file /var/log/messages.log.pos
tag host.messages
read_from_head true
<parse>
@type syslog
</parse>
</source>
<label @hostlogs>
<filter **>
@type kubernetes_metadata
@id filter_kube_metadata_host
</filter>
<filter **>
@type record_transformer
@id filter_containers_stream_transformer_host
<record>
stream_name ${tag}-${record["host"]}
</record>
</filter>
<match host.**>
@type cloudwatch_logs
@id out_cloudwatch_logs_host_logs
region "#{ENV.fetch('REGION')}"
log_group_name "/aws/containerinsights/#{ENV.fetch('CLUSTER_NAME')}/host"
log_stream_name_key stream_name
remove_log_stream_name_key true
auto_create_stream true
<buffer>
flush_interval 5
chunk_limit_size 2m
queued_chunks_limit_size 32
retry_forever true
</buffer>
</match>
</label>
---
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: fluentd-cloudwatch
namespace: amazon-cloudwatch
labels:
k8s-app: fluentd-cloudwatch
spec:
template:
metadata:
labels:
k8s-app: fluentd-cloudwatch
annotations:
configHash: 8915de4cf9c3551a8dc74c0137a3e83569d28c71044b0359c2578d2e0461825
spec:
serviceAccountName: fluentd
terminationGracePeriodSeconds: 30
# Because the image's entrypoint requires to write on /fluentd/etc but we mount configmap there which is read-only,
# this initContainers workaround or other is needed.
# See https://github.com/fluent/fluentd-kubernetes-daemonset/issues/90
initContainers:
- name: copy-fluentd-config
image: busybox
command: ['sh', '-c', 'cp /config-volume/..data/* /fluentd/etc']
volumeMounts:
- name: config-volume
mountPath: /config-volume
- name: fluentdconf
mountPath: /fluentd/etc
- name: update-log-driver
image: busybox
command: ['sh','-c','']
containers:
- name: fluentd-cloudwatch
image: fluent/fluentd-kubernetes-daemonset:v1.3.3-debian-cloudwatch-1.4
env:
- name: REGION
valueFrom:
configMapKeyRef:
name: cluster-info
key: logs.region
- name: CLUSTER_NAME
valueFrom:
configMapKeyRef:
name: cluster-info
key: cluster.name
- name: CI_VERSION
value: "k8s/1.0.0"
resources:
limits:
memory: 200Mi
requests:
cpu: 100m
memory: 200Mi
volumeMounts:
- name: config-volume
mountPath: /config-volume
- name: fluentdconf
mountPath: /fluentd/etc
- name: varlog
mountPath: /var/log
- name: varlibdockercontainers
mountPath: /var/lib/docker/containers
readOnly: true
- name: runlogjournal
mountPath: /run/log/journal
readOnly: true
- name: dmesg
mountPath: /var/log/dmesg
readOnly: true
volumes:
- name: config-volume
configMap:
name: fluentd-config
- name: fluentdconf
emptyDir: {}
- name: varlog
hostPath:
path: /var/log
- name: varlibdockercontainers
hostPath:
path: /var/lib/docker/containers
- name: runlogjournal
hostPath:
path: /run/log/journal
- name: dmesg
hostPath:
path: /var/log/dmesg