Since you’ve modified the database structure, you will also need to revise the regular population script. There are changes in three areas:
You need to remove all zip code related columns in the customer dimension population, as the customer addresses and shipping zip codes are no longer in the customer dimension
You need to use the surrogate keys from the customer zip code and shipping zip code dimension views. (Shown in bold in the script in Listing 23.6)
You need to move the pa_customer_dim population after the sales_order_fact population because to get the customer zip code you need to go to the customer_zip_code_sk in sales order fact table.
The regular population script in Listing 23.6 implements these three changes.
/******************************************************************/ /* */ /* dw_regular_23.sql */ /* */ /******************************************************************/ USE dw; /* CUSTOMER_DIM POPULATION */ TRUNCATE customer_stg; LOAD DATA INFILE 'customer.csv' INTO TABLE customer_stg FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY ' ' IGNORE 1 LINES ( customer_number , customer_name , customer_street_address , customer_zip_code , customer_city , customer_state , shipping_address , shipping_zip_code , shipping_city , shipping_state ) ; /* SCD 2 ON ADDRESSES */ UPDATE customer_dim a , customer_stg b SET a.expiry_date = SUBDATE(CURRENT_DATE, 1) WHERE a.customer_number = b.customer_number AND ( a.customer_street_address <> b.customer_street_address OR a.shipping_address <> b.shipping_address OR a.shipping_address IS NULL ) AND expiry_date = '9999-12-31' ; INSERT INTO customer_dim SELECT NULL , b.customer_number , b.customer_name , b.customer_street_address , b.shipping_address , CURRENT_DATE , '9999-12-31' FROM customer_dim a , customer_stg b WHERE a.customer_number = b.customer_number AND ( a.customer_street_address <> b.customer_street_address OR a.shipping_address <> b.shipping_address OR a.shipping_address IS NULL ) AND EXISTS ( SELECT * FROM customer_dim x WHERE b.customer_number = x.customer_number AND a.expiry_date = SUBDATE(CURRENT_DATE, 1)) AND NOT EXISTS ( SELECT * FROM customer_dim y WHERE b.customer_number = y.customer_number AND y.expiry_date = '9999-12-31') ; /* END OF SCD 2 */ /* SCD 1 ON NAME */ UPDATE customer_dim a, customer_stg b SET a.customer_name = b.customer_name WHERE a.customer_number = b.customer_number AND a.expiry_date = '9999-12-31' AND a.customer_name <> b.customer_name ; /* ADD NEW CUSTOMER */ INSERT INTO customer_dim SELECT NULL , customer_number , customer_name , customer_street_address , shipping_address , CURRENT_DATE , '9999-12-31' FROM customer_stg WHERE customer_number NOT IN( SELECT a.customer_number FROM customer_dim a , customer_stg b WHERE b.customer_number = a.customer_number ) ; /* END OF CUSTOMER_DIM POPULATION */ /* PRODUCT_DIM POPULATION */ TRUNCATE product_stg ; LOAD DATA INFILE 'product.txt' INTO TABLE product_stg FIELDS TERMINATED BY '' OPTIONALLY ENCLOSED BY '' LINES TERMINATED BY ' ' IGNORE 1 LINES ( product_code , product_name , product_category ) ; /* SCD2 ON PRODUCT NAME AND GROUP */ UPDATE product_dim a , product_stg b SET expiry_date = SUBDATE(CURRENT_DATE, 1) WHERE a.product_code = b.product_code AND ( a.product_name <> b.product_name OR a.product_category <> b.product_category ) AND expiry_date = '9999-12-31' ; INSERT INTO product_dim SELECT NULL , b.product_code , b.product_name , b.product_category , CURRENT_DATE , '9999-12-31' FROM product_dim a , product_stg b WHERE a.product_code = b.product_code AND ( a.product_name <> b.product_name OR a.product_category <> b.product_category ) AND EXISTS ( SELECT * FROM product_dim x WHERE b.product_code = x.product_code AND a.expiry_date = SUBDATE(CURRENT_DATE, 1)) AND NOT EXISTS ( SELECT * FROM product_dim y WHERE b.product_code = y.product_code AND y.expiry_date = '9999-12-31') ; /* END OF SCD 2 */ /* ADD NEW PRODUCT */ INSERT INTO product_dim SELECT NULL , product_code , product_name , product_category , CURRENT_DATE , '9999-12-31' FROM product_stg WHERE product_code NOT IN( SELECT y.product_code FROM product_dim x, product_stg y WHERE x.product_code = y.product_code ) ; /* END OF PRODUCT_DIM POPULATION */ /* PRODUCT_COUNT_FACT POPULATION */ TRUNCATE product_count_fact ; INSERT INTO product_count_fact (product_sk, product_launch_date_sk) SELECT a.product_sk , b.date_sk FROM product_dim a , date_dim b WHERE a.effective_date = b.date GROUP BY product_code HAVING COUNT(product_code) = 1 ; /* for products that have been updated by SCD2 */ INSERT INTO product_count_fact (product_sk, product_launch_date_sk) SELECT a.product_sk , b.date_sk FROM product_dim a , date_dim b WHERE a.effective_date = b.date GROUP BY product_code HAVING COUNT(product_code) > 1 ; /* END OF PRODUCT_COUNT_FACT POPULATION */ /* INSERT NEW ORDERS */ INSERT INTO sales_order_fact SELECT b.customer_sk , h.customer_zip_code_sk , i.shipping_zip_code_sk , c.product_sk , f.sales_order_attribute_sk , d.order_date_sk , NULL , NULL , NULL , NULL , g.entry_date_sk , a.order_number , e.request_delivery_date_sk , order_amount , quantity , NULL , NULL , NULL , NULL FROM source.sales_order a , customer_dim b , product_dim c , order_date_dim d , request_delivery_date_dim e , sales_order_attribute_dim f , entry_date_dim g , customer_zip_code_dim h , shipping_zip_code_dim i , customer_stg j WHERE order_status = 'N' AND a.entry_date = CURRENT_DATE AND a.customer_number = b.customer_number AND a.status_date >= b.effective_date AND a.status_date <= b.expiry_date AND a.customer_number = j.customer_number AND j.customer_zip_code = h.customer_zip_code AND a.status_date >= h.effective_date AND a.status_date <= h.expiry_date AND j.shipping_zip_code = i.shipping_zip_code AND a.status_date >= i.effective_date AND a.status_date <= i.expiry_date AND a.product_code = c.product_code AND a.status_date >= c.effective_date AND a.status_date <= c.expiry_date AND a.status_date = d.order_date AND a.entry_date = g.entry_date AND a.request_delivery_date = e.request_delivery_date AND a.verification_ind = f.verification_ind AND a.credit_check_flag = f.credit_check_flag AND a.new_customer_ind = f.new_customer_ind AND a.web_order_flag = f.web_order_flag AND a.status_date >= f.effective_date AND a.status_date <= f.expiry_date ; /* RE-BUILD PA CUSTOMER DIMENSION */ TRUNCATE pa_customer_dim; INSERT INTO pa_customer_dim SELECT DISTINCT a.* FROM customer_dim a , sales_order_fact b , customer_zip_code_dim c WHERE c.customer_state = 'PA' AND b.customer_zip_code_sk = c.customer_zip_code_sk AND a.customer_sk = b.customer_sk ; UPDATE sales_order_fact a , source.sales_order b , allocate_date_dim c SET a.allocate_date_sk = c.allocate_date_sk , a.allocate_quantity = b.quantity WHERE order_status = 'A' AND b.entry_date = CURRENT_DATE AND b.order_number = a.order_number AND c.allocate_date = b.status_date ; UPDATE sales_order_fact a , source.sales_order b , packing_date_dim d SET a.packing_date_sk = d.packing_date_sk , a.packing_quantity = b.quantity WHERE order_status = 'P' AND b.entry_date = CURRENT_DATE AND b.order_number = a.order_number AND d.packing_date = b.status_date ; UPDATE sales_order_fact a, source.sales_order b, ship_date_dim e SET a.ship_date_sk = e.ship_date_sk , a.ship_quantity = b.quantity WHERE order_status = 'S' AND b.entry_date = CURRENT_DATE AND b.order_number = a.order_number AND e.ship_date = b.status_date ; UPDATE sales_order_fact a , source.sales_order b , receive_date_dim f SET a.receive_date_sk = f.receive_date_sk , a.receive_quantity = b.quantity WHERE order_status = 'R' AND b.entry_date = CURRENT_DATE AND b.order_number = a.order_number AND f.receive_date = b.status_date ; /* end of script */ |
3.137.217.95