Since you now have a new dimension, you have to update the regular population script. Listing 18.3 shows the revised script.
/******************************************************************/ /* */ /* dw_regular_18.sql */ /* */ /******************************************************************/ USE dw; /* CUSTOMER_DIM POPULATION */ TRUNCATE customer_stg; LOAD DATA INFILE 'customer.csv' INTO TABLE customer_stg FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY ' ' IGNORE 1 LINES ( customer_number , customer_name , customer_street_address , customer_zip_code , customer_city , customer_state , shipping_address , shipping_zip_code , shipping_city , shipping_state ) ; /* SCD 2 ON ADDRESSES */ UPDATE customer_dim a , customer_stg b SET a.expiry_date = SUBDATE(CURRENT_DATE, 1) WHERE a.customer_number = b.customer_number AND ( a.customer_street_address <> b.customer_street_address OR a.customer_city <> b.customer_city OR a.customer_zip_code <> b.customer_zip_code OR a.customer_state <> b.customer_state OR a.shipping_address <> b.shipping_address OR a.shipping_city <> b.shipping_city OR a.shipping_zip_code <> b.shipping_zip_code OR a.shipping_state <> b.shipping_state OR a.shipping_address IS NULL OR a.shipping_city IS NULL OR a.shipping_zip_code IS NULL OR a.shipping_state IS NULL) AND expiry_date = '9999-12-31' ; INSERT INTO customer_dim SELECT NULL , b.customer_number , b.customer_name , b.customer_street_address , b.customer_zip_code , b.customer_city , b.customer_state , b.shipping_address , b.shipping_zip_code , b.shipping_city , b.shipping_state , CURRENT_DATE , '9999-12-31' FROM customer_dim a , customer_stg b WHERE a.customer_number = b.customer_number AND ( a.customer_street_address <> b.customer_street_address OR a.customer_city <> b.customer_city OR a.customer_zip_code <> b.customer_zip_code OR a.customer_state <> b.customer_state OR a.shipping_address <> b.shipping_address OR a.shipping_city <> b.shipping_city OR a.shipping_zip_code <> b.shipping_zip_code OR a.shipping_state <> b.shipping_state OR a.shipping_address IS NULL OR a.shipping_city IS NULL OR a.shipping_zip_code IS NULL OR a.shipping_state IS NULL) AND EXISTS ( SELECT * FROM customer_dim x WHERE b.customer_number = x.customer_number AND a.expiry_date = SUBDATE(CURRENT_DATE, 1)) AND NOT EXISTS ( SELECT * FROM customer_dim y WHERE b.customer_number = y.customer_number AND y.expiry_date = '9999-12-31') ; /* END OF SCD 2 */ /* SCD 1 ON NAME */ UPDATE customer_dim a , customer_stg b SET a.customer_name = b.customer_name WHERE a.customer_number = b.customer_number AND a.expiry_date = '9999-12-31' AND a.customer_name <> b.customer_name ; /* ADD NEW CUSTOMER */ INSERT INTO customer_dim SELECT NULL , customer_number , customer_name , customer_street_address , customer_zip_code , customer_city , customer_state , shipping_address , shipping_zip_code , shipping_city , shipping_state , CURRENT_DATE , '9999-12-31' FROM customer_stg WHERE customer_number NOT IN( SELECT a.customer_number FROM customer_dim a , customer_stg b WHERE b.customer_number = a.customer_number ) ; /* RE-BUILD PA CUSTOMER DIMENSION */ TRUNCATE pa_customer_dim; INSERT INTO pa_customer_dim SELECT customer_sk , customer_number , customer_name , customer_street_address , customer_zip_code , customer_city , customer_state , shipping_address , shipping_zip_code , shipping_city , shipping_state , effective_date , expiry_date FROM customer_dim WHERE customer_state = 'PA' ; /* END OF CUSTOMER_DIM POPULATION */ /* PRODUCT_DIM POPULATION */ TRUNCATE product_stg ; LOAD DATA INFILE 'product.txt' INTO TABLE product_stg FIELDS TERMINATED BY '' OPTIONALLY ENCLOSED BY '' LINES TERMINATED BY ' ' IGNORE 1 LINES ( product_code , product_name , product_category ) ; /* SCD2 ON PRODUCT NAME AND GROUP */ UPDATE product_dim a , product_stg b SET expiry_date = SUBDATE(CURRENT_DATE, 1) WHERE a.product_code = b.product_code AND ( a.product_name <> b.product_name OR a.product_category <> b.product_category ) AND expiry_date = '9999-12-31' ; INSERT INTO product_dim SELECT NULL , b.product_code , b.product_name , b.product_category , CURRENT_DATE , '9999-12-31' FROM product_dim a , product_stg b WHERE a.product_code = b.product_code AND ( a.product_name <> b.product_name OR a.product_category <> b.product_category ) AND EXISTS ( SELECT * FROM product_dim x WHERE b.product_code = x.product_code AND a.expiry_date = SUBDATE(CURRENT_DATE, 1)) AND NOT EXISTS ( SELECT * FROM product_dim y WHERE b.product_code = y.product_code AND y.expiry_date = '9999-12-31') ; /* END OF SCD 2 */ /* ADD NEW PRODUCT */ INSERT INTO product_dim SELECT NULL , product_code , product_name , product_category , CURRENT_DATE , '9999-12-31' FROM product_stg WHERE product_code NOT IN( SELECT y.product_code FROM product_dim x, product_stg y WHERE x.product_code = y.product_code ) ; /* END OF PRODUCT_DIM POPULATION */ /* insert new orders */ INSERT INTO sales_order_fact SELECT b.customer_sk , c.product_sk , f.sales_order_attribute_sk , d.order_date_sk , NULL , NULL , NULL , NULL , a.order_number , e.request_delivery_date_sk , order_amount , quantity , NULL , NULL , NULL , NULL FROM source.sales_order a , customer_dim b , product_dim c , order_date_dim d , request_delivery_date_dim e , sales_order_attribute_dim f WHERE order_status = 'N' AND entry_date = CURRENT_DATE AND a.customer_number = b.customer_number AND a.status_date >= b.effective_date AND a.status_date <= b.expiry_date AND a.product_code = c.product_code AND a.status_date >= c.effective_date AND a.status_date <= c.expiry_date AND a.status_date = d.order_date AND a.request_delivery_date = e.request_delivery_date AND a.verification_ind = f.verification_ind AND a.credit_check_flag = f.credit_check_flag AND a.new_customer_ind = f.new_customer_ind AND a.web_order_flag = f.web_order_flag AND a.status_date >= f.effective_date AND a.status_date <= f.expiry_date ; UPDATE sales_order_fact a , source.sales_order b , allocate_date_dim c SET a.allocate_date_sk = c.allocate_date_sk , a.allocate_quantity = b.quantity WHERE order_status = 'A' AND b.entry_date = CURRENT_DATE AND b.order_number = a.order_number AND c.allocate_date = b.status_date ; UPDATE sales_order_fact a , source.sales_order b , packing_date_dim d SET a.packing_date_sk = d.packing_date_sk , a.packing_quantity = b.quantity WHERE order_status = 'P' AND b.entry_date = CURRENT_DATE AND b.order_number = a.order_number AND d.packing_date = b.status_date ; UPDATE sales_order_fact a , source.sales_order b , ship_date_dim e SET a.ship_date_sk = e.ship_date_sk , a.ship_quantity = b.quantity WHERE order_status = 'S' AND b.entry_date = CURRENT_DATE AND b.order_number = a.order_number AND e.ship_date = b.status_date ; UPDATE sales_order_fact a , source.sales_order b , receive_date_dim f SET a.receive_date_sk = f.receive_date_sk , a.receive_quantity = b.quantity WHERE order_status = 'R' AND b.entry_date = CURRENT_DATE AND b.order_number = a.order_number AND f.receive_date = b.status_date ; /* end of script */ |
Before you can run the revised script, you need to change the sales_order source data by adding four attributes columns to it using the sales_order_attributes.sql script in Listing 18.4.
/******************************************************************/ /* */ /* sales_order_attributes.sql */ /* */ /******************************************************************/ USE source; ALTER TABLE sales_order ADD verification_ind CHAR(1) AFTER product_code , ADD credit_check_flag CHAR(1) AFTER verification_ind , ADD new_customer_ind CHAR(1) AFTER credit_check_flag , ADD web_order_flag CHAR(1) AFTER new_customer_ind ; /* end of script */ |
Run the script using this command.
mysql> . c:mysqlscriptssales_order_attributes.sql
You can see the response on your console.
Database changed Query OK, 67 rows affected (0.38 sec) Records: 67 Duplicates: 0 Warnings: 0
Now, add more sales orders. You can use the script in Listing 18.5 to add eight orders.
/******************************************************************/ /* */ /* sales_order_18.sql */ /* */ /******************************************************************/ USE source; INSERT INTO sales_order VALUES (54, 1, 1, 'Y', 'Y', 'N', 'Y', '2007-03-16', 'N', '2007-03-20', '2007-03-16', 7500, 75) , (55, 2, 2, 'N', 'N', 'N', 'N', '2007-03-16', 'N', '2007-03-20', '2007-03-16', 1000, 10) , (56, 3, 3, 'Y', 'Y', 'N', 'N', '2007-03-16', 'N', '2007-03-20', '2007-03-16', 7500, 75) , (57, 4, 4, 'Y', 'N', 'N', 'N', '2007-03-16', 'N', '2007-03-20', '2007-03-16', 1000, 10) , (58, 11, 1, 'N', 'Y', 'Y', 'Y', '2007-03-16', 'N', '2007-03-20', '2007-03-16', 7500, 75) , (59, 12, 2, 'N', 'Y', 'Y', 'N', '2007-03-16', 'N', '2007-03-20', '2007-03-16', 1000, 10) , (60, 13, 3, 'Y', 'Y', 'Y', 'N', '2007-03-16', 'N', '2007-03-20', '2007-03-16', 7500, 75) , (61, 14, 4, 'Y', 'N', 'Y', 'N', '2007-03-16', 'N', '2007-03-20', '2007-03-16', 1000, 10) ; /* end of script */ |
Run the script using this command.
mysql> . c:mysqlscriptssales_order_18.sql
The response should be as follows.
Database changed Query OK, 8 rows affected (0.05 sec) Records: 8 Duplicates: 0 Warnings: 0
To confirm that eight sales orders were loaded correctly, query the sales_order table in the source database.
mysql> select order_number, verification_ind vi, credit_check_flag ccf, -> new_customer_ind nci, web_order_flag wof -> from sales_order -> where order_number between 54 and 61;
Here is the content of the sales_order table.
+--------------+------+------+------+------+ | order_number | vi | ccf | nci | wof | +--------------+------+------+------+------+ | 54 | Y | Y | N | Y | | 55 | N | N | N | N | | 56 | Y | Y | N | N | | 57 | Y | N | N | N | | 58 | N | Y | Y | Y | | 59 | N | Y | Y | N | | 60 | Y | Y | Y | N | | 61 | Y | N | Y | N | +--------------+------+------+------+------+ 8 rows in set (0.43 sec)
Now, you’re ready to run the revised regular loading script. Set your MySQL date to March 16, 2007 (the order date) and run the dw_regular_18.sql script.
mysql> . c:mysqlscriptsdw_regular_18.sql
You should see the following on your MySQL console.
Database changed Query OK, 9 rows affected (0.06 sec) Query OK, 9 rows affected (0.07 sec) Records: 9 Deleted: 0 Skipped: 0 Warnings: 0 Query OK, 0 rows affected (0.00 sec) Rows matched: 0 Changed: 0 Warnings: 0 Query OK, 0 rows affected (0.00 sec) Records: 0 Duplicates: 0 Warnings: 0 Query OK, 0 rows affected (0.00 sec) Rows matched: 0 Changed: 0 Warnings: 0 Query OK, 0 rows affected (0.00 sec) Records: 0 Duplicates: 0 Warnings: 0 Query OK, 19 rows affected (0.07 sec) Query OK, 19 rows affected (0.06 sec) Records: 19 Duplicates: 0 Warnings: 0 Query OK, 4 rows affected (0.07 sec) Query OK, 4 rows affected (0.06 sec) Records: 4 Deleted: 0 Skipped: 0 Warnings: 0 Query OK, 0 rows affected (0.00 sec) Rows matched: 0 Changed: 0 Warnings: 0 Query OK, 0 rows affected (0.00 sec) Records: 0 Duplicates: 0 Warnings: 0 Query OK, 0 rows affected (0.00 sec) Records: 0 Duplicates: 0 Warnings: 0 Query OK, 8 rows affected (0.15 sec) Records: 8 Duplicates: 0 Warnings: 0 Query OK, 0 rows affected (0.01 sec) Rows matched: 0 Changed: 0 Warnings: 0 Query OK, 0 rows affected (0.01 sec) Rows matched: 0 Changed: 0 Warnings: 0 Query OK, 0 rows affected (0.01 sec) Rows matched: 0 Changed: 0 Warnings: 0 Query OK, 0 rows affected (0.00 sec) Rows matched: 0 Changed: 0 Warnings: 0
You can use the analytical query in Listing 18.6 to confirm correct loading. The query analyzes how many sales orders from new customers were checked for credit statuses.
/******************************************************************/ /* */ /* new_customer_credit_check.sql */ /* */ /******************************************************************/ USE dw; SELECT CONCAT( ROUND( checked / ( checked + not_checked )*100 ), ' %' ) FROM ( SELECT COUNT(*) checked FROM sales_order_fact a, sales_order_attribute_dim b WHERE new_customer_ind = 'Y' and credit_check_flag = 'Y' AND a.sales_order_attribute_sk = b.sales_order_attribute_sk) x , (SELECT COUNT(*) not_checked FROM sales_order_fact a, sales_order_attribute_dim b WHERE new_customer_ind = 'Y' and credit_check_flag = 'N' AND a.sales_order_attribute_sk = b.sales_order_attribute_sk) y; /* end of script */ |
Run the query using this command.
mysql> . c:mysqlscripts ew_customer_credit_check.sql
You should get the following output.
Database changed +------------------------------------------------------------------+ | CONCAT( ROUND( checked / ( checked + not_checked )*100 ), ' %' ) | +------------------------------------------------------------------+ | 75 % | +------------------------------------------------------------------+ 1 row in set (0.01 sec)
3.22.216.254