Carlos Aguni

Highly motivated self-taught IT analyst. Always learning and ready to explore new skills. An eternal apprentice.


FluentD multiline study

27 Jul 2022 »

http://work.haufegroup.io/fluentd-log-parsing/

sample

2015-10-15 08:19:05,190 [testThread] INFO testClass - Queue: update.testEntity; method: updateTestEntity; Object: testEntity; Key: 154696614; MessageID: ID:test1-37782-1444827636952-1:1:2:25:1; CorrelationID: f583ed1c-5352-4916-8252-47298732516e; started processing
2015-10-15 06:44:01,727 [ ajp-apr-127.0.0.1-8009-exec-2] LogInterceptor INFO user-agent: check_http/v2.1.1 (monitoring-plugins 2.1.1)
connection: close
host: test.testing.com
content-length: 0
X-Forwarded-For: 8.8.8.8
2015-10-15 08:21:04,716 [ ttt-grp-127.0.0.1-8119-test-11] LogInterceptor INFO HTTP/1.1 200 OK
2015-10-15 08:21:04,717 flush
2015-10-15 08:19:05.190000000 -0400 log.unprocessed: 
{"time":"2015-10-15 08:19:05,190",
"message":"[testThread] INFO testClass - Queue: update.testEntity; method: updateTestEntity; Object: testEntity; Key: 154696614; MessageID: ID:test1-37782-1444827636952-1:1:2:25:1; CorrelationID: f583ed1c-5352-4916-8252-47298732516e; started processing"
}
2015-10-15 06:44:01.727000000 -0400 log.unprocessed: 
{"time":"2015-10-15 06:44:01,727",
"message":"[ ajp-apr-127.0.0.1-8009-exec-2] LogInterceptor INFO user-agent: check_http/v2.1.1 (monitoring-plugins 2.1.1)\nconnection: close\nhost: test.testing.com\ncontent-length: 0\nX-Forwarded-For: 8.8.8.8"}
<source>
    @type tail
    path /opt/test
    tag log.unprocessed
    read_from_head true

    <parse>
        @type multiline
        format_firstline /\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}:\d{1,2},\d{3}/
        format1 /(?<time>\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{1,2}:\d{1,2},\d{3}) (?<message>(.|\s)*)/

        time_key time
        time_format %Y-%m-%d %H:%M:%S,%L
        keep_time_key true
    </parse>
</source>

<match log.unprocessed.**>
    @type rewrite_tag_filter
    <rule>
        key message
        pattern .*CorrelationID.*
        tag correlation
    </rule>
    <rule>
        key message
        pattern .*
        tag any
    </rule>
</match>


<filter correlation>
    @type parser
    key_name message
    #reserve_data yes
    <parse>
        @type regexp
        expression / * (.*method:) (?<method>[^;]*) *(.*Object:) (?<object>[^;]*) *(.*Key:) (?<objectkey>[^;]*) *(.*MessageID:) (?<messageID>[^;]*) *(.*CorrelationID:) (?<correlationID>[^;]*).*/
    </parse>
</filter>

<match>
    @type stdout
</match>
#<match correlation>
#    @type stdout
#</match>

#<match file.nginx>
#  @type copy
#  <store>
#    @type stdout
#    <format>
#      @type json
#    </format>
#    #<inject>
#    #  time_key timestamp
#    #  time_format %Y%m%dT%H%M%S%z
#    #  localtime false
#    #</inject>
#  </store>
#  #<store>
#  #  @type file
#  #  path /var/log/fluent/nginx
#  #  append true
#  #  #compress gzip
#  #  <format>
#  #    localtime false
#  #  </format>
#  #  <buffer time>
#  #    #timekey_wait 10m
#  #    #timekey 86400
#  #    timekey_wait 1s
#  #    timekey 10m
#  #    timekey_use_utc true
#  #    path /var/log/td-agent/nginx-buf000
#  #  </buffer>
#  #  <inject>
#  #    time_format %Y%m%dT%H%M%S%z
#  #    localtime false
#  #  </inject>
#  #</store>
#</match>
2022-07-28 00:37:32.659990770 -0400 correlation: {"method":"updateTestEntity","object":"testEntity","obj
ectkey":"154696614","messageID":"ID:test1-37782-1444827636952-1:1:2:25:1","correlationID":"f583ed1c-5352
-4916-8252-47298732516e"}
2015-10-15 06:44:01.727000000 -0400 any: {"time":"2015-10-15 06:44:01,727","message":"[ ajp-apr-127.0.0.
1-8009-exec-2] LogInterceptor INFO user-agent: check_http/v2.1.1 (monitoring-plugins 2.1.1)\nconnection:
 close\nhost: test.testing.com\ncontent-length: 0\nX-Forwarded-For: 8.8.8.8"}
2015-10-15 08:21:04.716000000 -0400 any: {"time":"2015-10-15 08:21:04,716","message":"[ ttt-grp-127.0.0.
1-8119-test-11] LogInterceptor INFO HTTP/1.1 200 OK"}

hue

[26/Jul/2022 05:55:22 -0700] access INFO 127.0.0.1, 10.48.13.195 user -
[26/Jul/2022 05:55:30 -0700] hive_server INFO Retrying
<source>
    @type tail
    path /opt/hue.log
    tag log.unprocessed
    read_from_head true

    <parse>
        @type multiline
        format_firstline /\[\d+\/\w+\/\d{4} \d+:\d+:\d+([^]]+)\]\s+\w+\s+\w+/
        format1 /\[(?<time>\d+\/\w+\/\d{4} \d+:\d+:\d+([^]]+))\]\s+(?<component>\w+)\s+(?<log_lvl>\w+) (?<message>.*)/

        #time_key time
        #time_format %Y-%m-%d %H:%M:%S,%L
        #keep_time_key true
    </parse>
</source>

#<match log.unprocessed.**>
#    @type rewrite_tag_filter
#    <rule>
#        key message
#        pattern .*CorrelationID.*
#        tag correlation
#    </rule>
#    <rule>
#        key message
#        pattern .*message.*
#        tag clear
#    </rule>
#</match>
#
#<match clear>
#    @type null
#</match>
#
#<filter correlation>
#    @type parser
#    key_name message
#    format / * (.*method:) (?<method>[^;]*) *(.*Object:) (?<object>[^;]*) *(.*Key:) (?<objectkey>[^;]*) *(.*MessageID:) (?<messageID>[^;]*) *(.*CorrelationID:) (?<correlationID>[^;]*).*/
#    #reserve_data yes
#</filter>
#
#<match correlation>
#    @type stdout
#</match>


<match log.unprocessed.**>
    @type rewrite_tag_filter
    <rule>
        key message
        pattern ^\d+\.\d+\.\d+\.\d+.*
        tag hasip
    </rule>
    <rule>
        key message
        pattern .*
        tag any
    </rule>
</match>

<filter hasip>
    @type parser
    key_name message
    format /(?<ip1>\d+\.\d+\.\d+\.\d+), (?<ip2>\d+\.\d+\.\d+\.\d+) (?<user>\w+) .*/
    reserve_data yes
</filter>

<filter log.hasip>
    @type record_transformer
    remove_keys message
</filter>

<match>
    @type stdout
</match>

#<match>
<match log.unprocessed.**>
  @type copy
  <store>
    @type stdout
    <format>
      @type json
    </format>
  </store>
</match>

#<match file.nginx>
#  @type copy
#  <store>
#    @type stdout
#    <format>
#      @type json
#    </format>
#    #<inject>
#    #  time_key timestamp
#    #  time_format %Y%m%dT%H%M%S%z
#    #  localtime false
#    #</inject>
#  </store>
#  #<store>
#  #  @type file
#  #  path /var/log/fluent/nginx
#  #  append true
#  #  #compress gzip
#  #  <format>
#  #    localtime false
#  #  </format>
#  #  <buffer time>
#  #    #timekey_wait 10m
#  #    #timekey 86400
#  #    timekey_wait 1s
#  #    timekey 10m
#  #    timekey_use_utc true
#  #    path /var/log/td-agent/nginx-buf000
#  #  </buffer>
#  #  <inject>
#  #    time_format %Y%m%dT%H%M%S%z
#  #    localtime false
#  #  </inject>
#  #</store>
#</match>
2022-07-28 00:39:55.700019862 -0400 hasip: {"component":"access","log_lvl":"INFO","message":"127.0.0.1, 
10.48.13.195 user - ","ip1":"127.0.0.1","ip2":"10.48.13.195","user":"user"}
2022-07-26 08:55:30.000000000 -0400 any: {"component":"hive_server","log_lvl":"INFO","message":"Retrying
"}