WhyLogs
https://medium.com/whylabs/data-quality-monitoring-in-apache-airflow-with-whylogs-f5ec71b3db05
from whylogs.core.constraints.factories import greater_than_number, mean_between_range
from whylogs_provider.operators.whylogs import (
WhylogsConstraintsOperator,
WhylogsSummaryDriftOperator,
)
greater_than_check_a = WhylogsConstraintsOperator(
task_id="greater_than_check_a",
profile_path="data/profile.bin",
constraint=greater_than_number(column_name="a", number=0),
)
greater_than_check_b = WhylogsConstraintsOperator(
task_id="greater_than_check_b",
profile_path="data/profile.bin",
constraint=greater_than_number(column_name="b", number=0),
)
avg_between_b = WhylogsConstraintsOperator(
task_id="avg_between_b",
profile_path="data/profile.bin",
break_pipeline=False,
constraint=mean_between_range(column_name="b", lower=0.0, upper=125.1261236210),
)
summary_drift = WhylogsSummaryDriftOperator(
task_id="drift_report",
target_profile_path="data/profile.bin",
reference_profile_path="data/ref_profile.bin",
reader="local",
write_report_path="data/Profile.html",
)