Carlos Aguni

Highly motivated self-taught IT analyst. Always learning and ready to explore new skills. An eternal apprentice.


Data Quality

29 Sep 2022 »

WhyLogs

https://medium.com/whylabs/data-quality-monitoring-in-apache-airflow-with-whylogs-f5ec71b3db05

from whylogs.core.constraints.factories import greater_than_number, mean_between_range
from whylogs_provider.operators.whylogs import (
   WhylogsConstraintsOperator,
   WhylogsSummaryDriftOperator,
)
 
greater_than_check_a = WhylogsConstraintsOperator(
       task_id="greater_than_check_a",
       profile_path="data/profile.bin",
       constraint=greater_than_number(column_name="a", number=0),
   )
   greater_than_check_b = WhylogsConstraintsOperator(
       task_id="greater_than_check_b",
       profile_path="data/profile.bin",
       constraint=greater_than_number(column_name="b", number=0),
   )
 
   avg_between_b = WhylogsConstraintsOperator(
       task_id="avg_between_b",
       profile_path="data/profile.bin",
       break_pipeline=False,
       constraint=mean_between_range(column_name="b", lower=0.0, upper=125.1261236210),
   )
 
   summary_drift = WhylogsSummaryDriftOperator(
       task_id="drift_report",
       target_profile_path="data/profile.bin",
       reference_profile_path="data/ref_profile.bin",
       reader="local",
       write_report_path="data/Profile.html",
   )