Carlos Aguni

Highly motivated self-taught IT analyst. Always learning and ready to explore new skills. An eternal apprentice.


FluentD Study

09 May 2022 »

How to produce Prometheus metrics out of Logs using Fluentd

Logging in Kubernetes with EFK Stack

https://www.udemy.com/course/logging-in-kubernetes-with-efk-stack/

https://gitlab.com/nanuchi/efk-course-commands/-/blob/master/fluentd-config.yaml

kind: ConfigMap
apiVersion: v1
metadata:
  name: fluentd-forwarder-cm
  namespace: default
  labels:
    app.kubernetes.io/component: forwarder
    app.kubernetes.io/instance: fluentd
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: fluentd
    helm.sh/chart: fluentd-1.3.0
  annotations:
    meta.helm.sh/release-name: fluentd
    meta.helm.sh/release-namespace: default
data:
  fluentd.conf: |

    # Ignore fluentd own events
    <match fluent.**>
        @type null
    </match>

    # HTTP input for the liveness and readiness probes
    <source>
        @type http
        port 9880
    </source>

    # Throw the healthcheck to the standard output instead of forwarding it
    <match fluentd.healthcheck>
        @type null
    </match>

    # Get the logs from the containers running in the node
    <source>
      @type tail
      path /var/log/containers/*-app*.log
      pos_file /opt/bitnami/fluentd/logs/buffers/fluentd-docker.pos
      tag kubernetes.*
      read_from_head true
      format json
      time_format %Y-%m-%dT%H:%M:%S.%NZ
    </source>

    <filter **>
      @type parser
      key_name log
      <parse>
        @type multi_format
        <pattern>
          format json
          time_key time
          keep_time_key true
        </pattern>
      </parse>
    </filter>

    # enrich with kubernetes metadata
    <filter kubernetes.**>
        @type kubernetes_metadata
    </filter>


    <match kubernetes.var.log.containers.**java-app**.log>
      @type elasticsearch
      include_tag_key true
      host "elasticsearch-master.default.svc.cluster.local"
      port "9200"
      index_name "java-app-logs"
      <buffer>
        @type file
        path /opt/bitnami/fluentd/logs/buffers/java-logs.buffer
        flush_thread_count 2
        flush_interval 5s
      </buffer>
    </match>

    <match kubernetes.var.log.containers.**node-app**.log>
      @type elasticsearch
      include_tag_key true
      host "elasticsearch-master.default.svc.cluster.local"
      port "9200"
      index_name "node-app-logs"
      <buffer>
        @type file
        path /opt/bitnami/fluentd/logs/buffers/node-logs.buffer
        flush_thread_count 2
        flush_interval 5s
      </buffer>
    </match>

Fluentd Webinar: Best kept secret to unify logging on AWS, Docker, GCP, and more!

https://youtu.be/aeGADcC-hUA

Apache to S3

docs.fluentd.org/how-to-guides/apache-to-s3

<source>
  @type tail
  path /var/log/apache2/access_log
  pos_file /var/log/td-agent/apache2.access_log.pos
  <parse>
    @type apache2
  </parse>
  tag s3.apache.access
</source>
  • buffer
    • file
    • memory
    • file_single

https://docs.fluentd.org/input/tail

read_from_head true
follow_inodes true  # Without this parameter, file rotation causes log duplication.

<source>
  @type tail
  path /opt/td-agent-lab/weblog.csv
  pos_file /var/log/td-agent/weblog7.pos
  read_from_head true
  follow_inodes true
  <parse>
    @type none
  </parse>
  format json
  tag s3.var.secure
</source>

<filter s3.*.*>
  @type record_transformer
  <record>
    hostname "#{Socket.gethostname}"
    tag ${tag}
    region us-east-1
  </record>
</filter>

<match s3.*.*>
  @type s3

  aws_key_id <your aws_key_id>
  aws_sec_key <your aws sec key>
  s3_bucket <your-s3-bucket>
  path td-agent2/logs/%Y/%m/%d/%H/
  store_as json

  s3_object_key_format %{path}%{time_slice}_%{index}.%{file_extension}

  host lalahost

  <format>
    @type json
  </format>

  <buffer>
    @type file
    path /var/log/td-agent/s3-7
    #timekey 3600  # 1 hour
    timekey 1
    timekey_wait 10
    #timekey_wait 10m
    chunk_limit_size 256m
  </buffer>

  time_slice_format %Y%m%d%H
</match>

give permissions to nginx

https://groups.google.com/g/fluentd/c/yr7faV9DlU8

chmod og+rx /var/log/httpd
chmod og+r /var/log/messages /var/log/secure /var/log/httpd/*

read nginx logs

raw log

tail /var/log/nginx/access.log

192.168.100.59 - - [08/Jun/2022:00:44:16 -0400] "GET / HTTP/1.1" 200 3141 "-" "curl/7.61.1" "-"
192.168.100.59 - - [08/Jun/2022:00:44:17 -0400] "GET / HTTP/1.1" 200 3141 "-" "curl/7.61.1" "-"
192.168.100.59 - - [08/Jun/2022:00:44:18 -0400] "GET / HTTP/1.1" 200 3141 "-" "curl/7.61.1" "-"
192.168.100.59 - - [08/Jun/2022:00:44:19 -0400] "GET / HTTP/1.1" 200 3141 "-" "curl/7.61.1" "-"
192.168.100.59 - - [08/Jun/2022:00:44:20 -0400] "GET / HTTP/1.1" 200 3141 "-" "curl/7.61.1" "-"
192.168.100.59 - - [08/Jun/2022:00:44:21 -0400] "GET / HTTP/1.1" 200 3141 "-" "curl/7.61.1" "-"
192.168.100.59 - - [08/Jun/2022:00:44:22 -0400] "GET / HTTP/1.1" 200 3141 "-" "curl/7.61.1" "-"
192.168.100.59 - - [08/Jun/2022:00:44:23 -0400] "GET / HTTP/1.1" 200 3141 "-" "curl/7.61.1" "-"
192.168.100.59 - - [08/Jun/2022:00:44:24 -0400] "GET / HTTP/1.1" 200 3141 "-" "curl/7.61.1" "-"
192.168.100.59 - - [08/Jun/2022:00:44:25 -0400] "GET / HTTP/1.1" 200 3141 "-" "curl/7.61.1" "-"
<source>
    @type tail
    #path /opt/td-agent-lab/weblog.csv
    path /var/log/nginx/access.log
    pos_file /var/log/td-agent/nginx-access.pos4
    read_from_head true
    follow_inodes true
    <parse>
        @type nginx
    </parse>
    tag file.nginx
</source>

<filter file.nginx>
  @type record_transformer
  <record>
    hostname "#{Socket.gethostname}"
    tag ${tag}
    region us-east-1
  </record>
</filter>

<match file.nginx>
  @type copy
  <store>
    @type stdout
    <format>
      @type json
    </format>
  </store>
  #<store>
  #  @type file
  #  path /var/log/fluent/nginx
  #  append true
  #  #compress gzip
  #  <format>
  #    localtime false
  #  </format>
  #  <buffer time>
  #    #timekey_wait 10m
  #    #timekey 86400
  #    timekey_wait 1s
  #    timekey 10m
  #    timekey_use_utc true
  #    path /var/log/td-agent/nginx-buf000
  #  </buffer>
  #  <inject>
  #    time_format %Y%m%dT%H%M%S%z
  #    localtime false
  #  </inject>
  #</store>
</match>

/var/log/td-agent/td-agent.conf

{"remote":"192.168.100.59","host":"-","user":"-","method":"GET","path":"/","code":"200","size":"3141","referer":"-","agent":"curl/7.61.1","http_x_forwarded_for":"-","hostname":"automation","tag":"file.nginx","region":"us-east-1"}
{"remote":"192.168.100.59","host":"-","user":"-","method":"GET","path":"/","code":"200","size":"3141","referer":"-","agent":"curl/7.61.1","http_x_forwarded_for":"-","hostname":"automation","tag":"file.nginx","region":"us-east-1"}
{"remote":"192.168.100.59","host":"-","user":"-","method":"GET","path":"/","code":"200","size":"3141","referer":"-","agent":"curl/7.61.1","http_x_forwarded_for":"-","hostname":"automation","tag":"file.nginx","region":"us-east-1"}
{"remote":"192.168.100.59","host":"-","user":"-","method":"GET","path":"/","code":"200","size":"3141","referer":"-","agent":"curl/7.61.1","http_x_forwarded_for":"-","hostname":"automation","tag":"file.nginx","region":"us-east-1"}
{"remote":"192.168.100.59","host":"-","user":"-","method":"GET","path":"/","code":"200","size":"3141","referer":"-","agent":"curl/7.61.1","http_x_forwarded_for":"-","hostname":"automation","tag":"file.nginx","region":"us-east-1"}

/var/log/secure

raw

Jun  7 03:00:04 automation sshd[25859]: Received disconnect from 192.168.100.49 port 54178:11: disconnected by user
Jun  7 03:00:04 automation sshd[25859]: Disconnected from 192.168.100.49 port 54178
Jun  7 03:00:04 automation sshd[25859]: pam_unix(sshd:session): session closed for user root
Jun  7 03:00:05 automation sshd[26074]: Accepted publickey for root from 192.168.100.49 port 54180 ssh2: RSA SHA256:QOFSbWuC6mVyDFwOBzITOMn2oD3ybU1c1HlB+hsD5tQ
Jun  7 03:00:05 automation sshd[26074]: pam_unix(sshd:session): session opened for user root by (uid=0)
Jun  7 03:00:05 automation sshd[26074]: Received disconnect from 192.168.100.49 port 54180:11: disconnected by user
Jun  7 03:00:05 automation sshd[26074]: Disconnected from 192.168.100.49 port 54180
Jun  7 03:00:05 automation sshd[26074]: pam_unix(sshd:session): session closed for user root
Jun  7 23:20:19 automation polkitd[925]: Registered Authentication Agent for unix-process:25184:104509147 (system bus name :1.18877 [/usr/bin/pkttyagent --notify-fd 5 --fallback], object path /org/freedesktop/PolicyKit1/AuthenticationAgent, locale en_US.UTF-8)
Jun  7 23:20:20 automation polkitd[925]: Unregistered Authentication Agent for unix-process:25184:104509147 (system bus name :1.18877, object path /org/freedesktop/PolicyKit1/AuthenticationAgent, locale en_US.UTF-8) (disconnected from bus)
Jun  7 23:22:59 automation polkitd[925]: Registered Authentication Agent for unix-process:26123:104525122 (system bus name :1.18878 [/usr/bin/pkttyagent --notify-fd 5 --fallback], object path /org/freedesktop/PolicyKit1/AuthenticationAgent, locale en_US.UTF-8)
Jun  7 23:23:00 automation polkitd[925]: Unregistered Authentication Agent for unix-process:26123:104525122 (system bus name :1.18878, object path /org/freedesktop/PolicyKit1/AuthenticationAgent, locale en_US.UTF-8) (disconnected from bus)

conf

<source>
    @type tail
    #path /opt/td-agent-lab/weblog.csv
    path /var/log/secure
    pos_file /var/log/td-agent/secure.pos
    read_from_head true
    follow_inodes true
    <parse>
        @type regexp
        expression /^(?<time>\w+\s+\d \d+:\d+:\d+) (?<hostname>[^ ]+) (?<service>[^\[]+)\[(?<service_pid>\d+)\]: (?<message>.*)$/
        #time_key time
        #time_format %Y-%m-%d %H:%M:%S %z
    </parse>
    tag file.secure
</source>

<filter file.secure>
  @type record_transformer
  <record>
    hostname "#{Socket.gethostname}"
    tag ${tag}
    region us-east-1
  </record>
</filter>

<match file.secure>
  @type copy
  <store>
    @type stdout
    <format>
      @type json
    </format>
  </store>
  #<store>
  #  @type file
  #  path /var/log/fluent/nginx
  #  append true
  #  #compress gzip
  #  <format>
  #    localtime false
  #  </format>
  #  <buffer time>
  #    #timekey_wait 10m
  #    #timekey 86400
  #    timekey_wait 1s
  #    timekey 10m
  #    timekey_use_utc true
  #    path /var/log/td-agent/nginx-buf000
  #  </buffer>
  #  <inject>
  #    time_format %Y%m%dT%H%M%S%z
  #    localtime false
  #  </inject>
  #</store>
</match>

parsed

{"hostname":"automation","service":"polkitd","service_pid":"925","message":"Unregistered Authentication Agent for unix-process:13835:105267926 (system bus name :1.19058, object path /org/freedesktop/PolicyKit1/AuthenticationAgent, locale en_US.UTF-8) (disconnected from bus)","tag":"file.secure","region":"us-east-1"}
{"hostname":"automation","service":"polkitd","service_pid":"925","message":"Registered Authentication Agent for unix-process:13948:105269506 (system bus name :1.19059 [/usr/bin/pkttyagent --notify-fd 5 --fallback], object path /org/freedesktop/PolicyKit1/AuthenticationAgent, locale en_US.UTF-8)","tag":"file.secure","region":"us-east-1"}
{"hostname":"automation","service":"polkitd","service_pid":"925","message":"Unregistered Authentication Agent for unix-process:13948:105269506 (system bus name :1.19059, object path /org/freedesktop/PolicyKit1/AuthenticationAgent, locale en_US.UTF-8) (disconnected from bus)","tag":"file.secure","region":"us-east-1"}
{"hostname":"automation","service":"polkitd","service_pid":"925","message":"Registered Authentication Agent for unix-process:14107:105272353 (system bus name :1.19060 [/usr/bin/pkttyagent --notify-fd 5 --fallback], object path /org/freedesktop/PolicyKit1/AuthenticationAgent, locale en_US.UTF-8)","tag":"file.secure","region":"us-east-1"}
{"hostname":"automation","service":"polkitd","service_pid":"925","message":"Unregistered Authentication Agent for unix-process:14107:105272353 (system bus name :1.19060, object path /org/freedesktop/PolicyKit1/AuthenticationAgent, locale en_US.UTF-8) (disconnected from bus)","tag":"file.secure","region":"us-east-1"}

tests2 nginx stream

172.24.206.32 [27/Jun/2022:18:56:07 -0300] TCP 200 0 0 0.000

<source>
    @type tail
    #path /opt/td-agent-lab/weblog.csv
    path /opt/stream_access.log
    pos_file /var/log/td-agent/nginx-access.pos23
    read_from_head true
    <parse>
        @type regexp
        expression /^(?<ip>[^ ]+) \[(?<ds>[^]]+)\] (?<proto>\w+) (?<status_code>\w+) (?<num1>\d+) (?<num2>\d+) (?<duration>\d+?\.?\d+)$/
        #time_type string
        time_key ds
        time_format %d/%b/%Y:%H:%M:%S %z
        keep_time_key true
    </parse>
    follow_inodes true
    tag file.nginx
</source>


<match file.nginx>
  @type copy
  <store>
    @type stdout
    <format>
      @type json
    </format>
    <inject>
      time_key timestamp
      time_format %Y%m%dT%H%M%S%z
      localtime false
    </inject>
  </store>
  #<store>
  #  @type file
  #  path /var/log/fluent/nginx
  #  append true
  #  #compress gzip
  #  <format>
  #    localtime false
  #  </format>
  #  <buffer time>
  #    #timekey_wait 10m
  #    #timekey 86400
  #    timekey_wait 1s
  #    timekey 10m
  #    timekey_use_utc true
  #    path /var/log/td-agent/nginx-buf000
  #  </buffer>
  #  <inject>
  #    time_format %Y%m%dT%H%M%S%z
  #    localtime false
  #  </inject>
  #</store>
</match>

output

{
    "ip":"172.24.206.32",
    "ds":"27/Jun/2022:18:56:07 -0300",
    "proto":"TCP",
    "status_code":"200",
    "num1":"0",
    "num2":"0",
    "duration":"0.000",
    "timestamp":1656366967.0
}