| { |
| "dag_id": "ecommerce_etl_pipeline", |
| "description": "Daily ETL pipeline for e-commerce data warehouse", |
| "schedule_interval": "0 2 * * *", |
| "start_date": "2025-01-01", |
| "catchup": false, |
| "tags": ["etl", "ecommerce", "daily"], |
| "default_args": { |
| "owner": "data_engineering", |
| "retries": 3, |
| "retry_delay_minutes": 5, |
| "email_on_failure": true |
| }, |
| "tasks": [ |
| { |
| "task_id": "extract_customers", |
| "operator": "PythonOperator", |
| "description": "Extract customer data from source database", |
| "upstream_dependencies": [], |
| "downstream_dependencies": ["transform_customers"], |
| "source": "postgres://source_db/customers", |
| "target": "s3://data-lake/raw/customers/" |
| }, |
| { |
| "task_id": "extract_orders", |
| "operator": "PythonOperator", |
| "description": "Extract orders data from source database", |
| "upstream_dependencies": [], |
| "downstream_dependencies": ["transform_orders"], |
| "source": "postgres://source_db/orders", |
| "target": "s3://data-lake/raw/orders/" |
| }, |
| { |
| "task_id": "extract_products", |
| "operator": "PythonOperator", |
| "description": "Extract products data from source database", |
| "upstream_dependencies": [], |
| "downstream_dependencies": ["transform_products"], |
| "source": "postgres://source_db/products", |
| "target": "s3://data-lake/raw/products/" |
| }, |
| { |
| "task_id": "extract_order_items", |
| "operator": "PythonOperator", |
| "description": "Extract order items from source database", |
| "upstream_dependencies": [], |
| "downstream_dependencies": ["transform_order_items"], |
| "source": "postgres://source_db/order_items", |
| "target": "s3://data-lake/raw/order_items/" |
| }, |
| { |
| "task_id": "transform_customers", |
| "operator": "SparkSubmitOperator", |
| "description": "Clean and transform customer data", |
| "upstream_dependencies": ["extract_customers"], |
| "downstream_dependencies": ["load_dim_customers"], |
| "source": "s3://data-lake/raw/customers/", |
| "target": "s3://data-lake/transformed/customers/" |
| }, |
| { |
| "task_id": "transform_orders", |
| "operator": "SparkSubmitOperator", |
| "description": "Clean and transform orders data", |
| "upstream_dependencies": ["extract_orders"], |
| "downstream_dependencies": ["load_fct_orders"], |
| "source": "s3://data-lake/raw/orders/", |
| "target": "s3://data-lake/transformed/orders/" |
| }, |
| { |
| "task_id": "transform_products", |
| "operator": "SparkSubmitOperator", |
| "description": "Clean and transform products data", |
| "upstream_dependencies": ["extract_products"], |
| "downstream_dependencies": ["load_dim_products"], |
| "source": "s3://data-lake/raw/products/", |
| "target": "s3://data-lake/transformed/products/" |
| }, |
| { |
| "task_id": "transform_order_items", |
| "operator": "SparkSubmitOperator", |
| "description": "Clean and transform order items data", |
| "upstream_dependencies": ["extract_order_items"], |
| "downstream_dependencies": ["load_fct_orders"], |
| "source": "s3://data-lake/raw/order_items/", |
| "target": "s3://data-lake/transformed/order_items/" |
| }, |
| { |
| "task_id": "load_dim_customers", |
| "operator": "SnowflakeOperator", |
| "description": "Load customer dimension to Snowflake", |
| "upstream_dependencies": ["transform_customers"], |
| "downstream_dependencies": ["build_customer_metrics"], |
| "source": "s3://data-lake/transformed/customers/", |
| "target": "snowflake://warehouse/analytics.dim_customers" |
| }, |
| { |
| "task_id": "load_dim_products", |
| "operator": "SnowflakeOperator", |
| "description": "Load product dimension to Snowflake", |
| "upstream_dependencies": ["transform_products"], |
| "downstream_dependencies": ["build_sales_report"], |
| "source": "s3://data-lake/transformed/products/", |
| "target": "snowflake://warehouse/analytics.dim_products" |
| }, |
| { |
| "task_id": "load_fct_orders", |
| "operator": "SnowflakeOperator", |
| "description": "Load orders fact table to Snowflake", |
| "upstream_dependencies": ["transform_orders", "transform_order_items"], |
| "downstream_dependencies": ["build_customer_metrics", "build_sales_report"], |
| "source": ["s3://data-lake/transformed/orders/", "s3://data-lake/transformed/order_items/"], |
| "target": "snowflake://warehouse/analytics.fct_orders" |
| }, |
| { |
| "task_id": "build_customer_metrics", |
| "operator": "SnowflakeOperator", |
| "description": "Calculate customer lifetime value and metrics", |
| "upstream_dependencies": ["load_dim_customers", "load_fct_orders"], |
| "downstream_dependencies": ["publish_to_bi"], |
| "source": ["analytics.dim_customers", "analytics.fct_orders"], |
| "target": "snowflake://warehouse/analytics.rpt_customer_metrics" |
| }, |
| { |
| "task_id": "build_sales_report", |
| "operator": "SnowflakeOperator", |
| "description": "Build daily sales report", |
| "upstream_dependencies": ["load_dim_products", "load_fct_orders"], |
| "downstream_dependencies": ["publish_to_bi"], |
| "source": ["analytics.dim_products", "analytics.fct_orders"], |
| "target": "snowflake://warehouse/analytics.rpt_daily_sales" |
| }, |
| { |
| "task_id": "publish_to_bi", |
| "operator": "PythonOperator", |
| "description": "Publish reports to BI tool", |
| "upstream_dependencies": ["build_customer_metrics", "build_sales_report"], |
| "downstream_dependencies": ["notify_stakeholders"], |
| "source": ["analytics.rpt_customer_metrics", "analytics.rpt_daily_sales"], |
| "target": "tableau://server/ecommerce_dashboard" |
| }, |
| { |
| "task_id": "notify_stakeholders", |
| "operator": "EmailOperator", |
| "description": "Send completion notification", |
| "upstream_dependencies": ["publish_to_bi"], |
| "downstream_dependencies": [] |
| } |
| ], |
| "notes": "Sample Airflow DAG representing a complete ETL pipeline with extract, transform, load, and reporting stages." |
| } |
|
|