Revising the DW Regular Population Script

Since the schema has changed, you need to revise the regular population script as well. You need the script to also populate the product_count_fact table right after the product_dim table gets populated. Listing 23.5 shows the revised regular population script.

Listing 21.5. The revised daily DW regular population script
/******************************************************************/
/*                                                                */
/* dw_regular_21.sql                                              */
/*                                                                */
/******************************************************************/

USE dw;

/* CUSTOMER_DIM POPULATION                                        */

TRUNCATE customer_stg;

LOAD DATA INFILE 'customer.csv'
INTO TABLE customer_stg
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '
'
IGNORE 1 LINES
( customer_number
, customer_name
, customer_street_address
, customer_zip_code
, customer_city
, customer_state
, shipping_address
, shipping_zip_code
, shipping_city
, shipping_state )
;

/* SCD 2 ON ADDRESSES                                             */

UPDATE
  customer_dim a
, customer_stg b
SET
  a.expiry_date = SUBDATE(CURRENT_DATE, 1)
WHERE
  a.customer_number = b.customer_number
AND (   a.customer_street_address <> b.customer_street_address
     OR a.customer_city <> b.customer_city
     OR a.customer_zip_code <> b.customer_zip_code
     OR a.customer_state <> b.customer_state
     OR a.shipping_address <> b.shipping_address
     OR a.shipping_city <> b.shipping_city
     OR a.shipping_zip_code <> b.shipping_zip_code
     OR a.shipping_state <> b.shipping_state
     OR a.shipping_address IS NULL
     OR a.shipping_city IS NULL
     OR a.shipping_zip_code IS NULL
     OR a.shipping_state IS NULL)
AND expiry_date = '9999-12-31'
;

INSERT INTO customer_dim
SELECT
  NULL
, b.customer_number
, b.customer_name
, b.customer_street_address
, b.customer_zip_code
, b.customer_city
, b.customer_state
, b.shipping_address
, b.shipping_zip_code
, b.shipping_city
, b.shipping_state
, CURRENT_DATE
, '9999-12-31'
FROM
  customer_dim a
, customer_stg b
WHERE
    a.customer_number = b.customer_number
AND (   a.customer_street_address <> b.customer_street_address
     OR a.customer_city <> b.customer_city
     OR a.customer_zip_code <> b.customer_zip_code
     OR a.customer_state <> b.customer_state
     OR a.shipping_address <> b.shipping_address
     OR a.shipping_city <> b.shipping_city
     OR a.shipping_zip_code <> b.shipping_zip_code
     OR a.shipping_state <> b.shipping_state
     OR a.shipping_address IS NULL
     OR a.shipping_city IS NULL
     OR a.shipping_zip_code IS NULL
     OR a.shipping_state IS NULL)
AND EXISTS (
SELECT *
FROM customer_dim x
WHERE b.customer_number = x.customer_number
AND a.expiry_date = SUBDATE(CURRENT_DATE, 1))
AND NOT EXISTS (
SELECT *
FROM customer_dim y
WHERE     b.customer_number = y.customer_number
      AND y.expiry_date = '9999-12-31')
;

/* END OF SCD 2                                                   */

/* SCD 1 ON NAME                                                  */

UPDATE customer_dim a, customer_stg b
SET a.customer_name = b.customer_name
WHERE     a.customer_number = b.customer_number
      AND a.expiry_date = '9999-12-31'
      AND a.customer_name <> b.customer_name
;

/* ADD NEW CUSTOMER                                               */

INSERT INTO customer_dim
SELECT
  NULL
, customer_number
, customer_name
, customer_street_address
, customer_zip_code
, customer_city
, customer_state
, shipping_address
, shipping_zip_code
, shipping_city
, shipping_state
, CURRENT_DATE
, '9999-12-31'
FROM customer_stg
WHERE customer_number NOT IN(
SELECT a.customer_number
FROM
  customer_dim a
, customer_stg b
WHERE b.customer_number = a.customer_number )
;

/* RE-BUILD PA CUSTOMER DIMENSION                                 */

TRUNCATE pa_customer_dim;

INSERT INTO pa_customer_dim
SELECT
  customer_sk
, customer_number
, customer_name
, customer_street_address
, customer_zip_code
, customer_city
, customer_state
, shipping_address
, shipping_zip_code
, shipping_city
, shipping_state
, effective_date
, expiry_date
FROM customer_dim
WHERE customer_state = 'PA'
;

/* END OF CUSTOMER_DIM POPULATION                                 */

/* PRODUCT_DIM POPULATION                                         */

TRUNCATE product_stg
;

LOAD DATA INFILE 'product.txt'
INTO TABLE product_stg
FIELDS TERMINATED BY ''
OPTIONALLY ENCLOSED BY ''
LINES TERMINATED BY '
'
IGNORE 1 LINES
( product_code
, product_name
, product_category )
;

/* SCD2 ON PRODUCT NAME AND GROUP                                 */

UPDATE
  product_dim a
, product_stg b
SET
  expiry_date = SUBDATE(CURRENT_DATE, 1)
WHERE
    a.product_code = b.product_code
AND (   a.product_name <> b.product_name
     OR a.product_category <> b.product_category )
     AND expiry_date = '9999-12-31'
;

INSERT INTO product_dim
SELECT
  NULL
, b.product_code
, b.product_name
, b.product_category
, CURRENT_DATE
, '9999-12-31'
FROM
  product_dim a
, product_stg b
WHERE
    a.product_code = b.product_code
AND (   a.product_name <> b.product_name
     OR a.product_category <> b.product_category )
AND EXISTS (
SELECT *
FROM product_dim x
WHERE     b.product_code = x.product_code
             AND a.expiry_date = SUBDATE(CURRENT_DATE, 1))
AND NOT EXISTS (
SELECT *
FROM product_dim y
WHERE    b.product_code = y.product_code
     AND y.expiry_date = '9999-12-31')
;

/* END OF SCD 2                                                   */

/* ADD NEW PRODUCT                                                */

INSERT INTO product_dim
SELECT
  NULL
, product_code
, product_name
, product_category
, CURRENT_DATE
, '9999-12-31'
FROM product_stg
WHERE product_code NOT IN(
SELECT y.product_code
FROM product_dim x, product_stg y
WHERE x.product_code = y.product_code )
;

/* END OF PRODUCT_DIM POPULATION                                  */

/* PRODUCT_COUNT_FACT POPULATION                                  */

TRUNCATE product_count_fact
;

INSERT INTO product_count_fact
(product_sk, product_launch_date_sk)
SELECT
  a.product_sk
, b.date_sk
FROM
  product_dim a
, date_dim b
WHERE
a.effective_date = b.date
GROUP BY product_code
HAVING COUNT(product_code) = 1
;

/* for products that have been updated by SCD2                    */

INSERT INTO product_count_fact
(product_sk, product_launch_date_sk)
SELECT
  a.product_sk
, b.date_sk
FROM
  product_dim a
, date_dim b
WHERE
a.effective_date = b.date
GROUP BY product_code
HAVING COUNT(product_code) > 1
;

/* END OF PRODUCT_COUNT_FACT POPULATION                           */

/* insert new orders                                              */

INSERT INTO sales_order_fact
SELECT
  b.customer_sk
, c.product_sk
, f.sales_order_attribute_sk
, d.order_date_sk
, NULL
, NULL
, NULL
, NULL
, a.order_number
, e.request_delivery_date_sk
, order_amount
, quantity
, NULL
, NULL
, NULL
, NULL
FROM
  source.sales_order a
, customer_dim b
, product_dim c
, order_date_dim d
, request_delivery_date_dim e
, sales_order_attribute_dim f
WHERE
      order_status = 'N'
AND entry_date = CURRENT_DATE
AND a.customer_number = b.customer_number
AND a.status_date >= b.effective_date
AND a.status_date <= b.expiry_date
AND a.product_code = c.product_code
AND a.status_date >= c.effective_date
AND a.status_date <= c.expiry_date
AND a.status_date = d.order_date
AND a.request_delivery_date = e.request_delivery_date
AND a.verification_ind = f.verification_ind
AND a.credit_check_flag = f.credit_check_flag
AND a.new_customer_ind = f.new_customer_ind
AND a.web_order_flag = f.web_order_flag
AND a.status_date >= f.effective_date
AND a.status_date <= f.expiry_date
;

UPDATE sales_order_fact a, source.sales_order b, allocate_date_dim c
SET
  a.allocate_date_sk = c.allocate_date_sk
, a.allocate_quantity = b.quantity
WHERE
    order_status = 'A'
AND b.entry_date = CURRENT_DATE
AND b.order_number = a.order_number
AND c.allocate_date = b.status_date
;

UPDATE sales_order_fact a, source.sales_order b, packing_date_dim d
SET
  a.packing_date_sk = d.packing_date_sk
, a.packing_quantity = b.quantity
WHERE
    order_status = 'P'
AND b.entry_date = CURRENT_DATE
AND b.order_number = a.order_number
AND d.packing_date = b.status_date
;

UPDATE sales_order_fact a, source.sales_order b, ship_date_dim e
SET
  a.ship_date_sk = e.ship_date_sk
, a.ship_quantity = b.quantity
WHERE
    order_status = 'S'
AND b.entry_date = CURRENT_DATE
AND b.order_number = a.order_number
AND e.ship_date = b.status_date
;

UPDATE sales_order_fact a, source.sales_order b, receive_date_dim f
SET
  a.receive_date_sk = f.receive_date_sk
, a.receive_quantity = b.quantity
WHERE
    order_status = 'R'
AND b.entry_date = CURRENT_DATE
AND b.order_number = a.order_number
AND f.receive_date = b.status_date
;

/* end of script                                                  */

Before you can test the revised regular population script, you need to do two things. First, you need to prepare a product text file. Use the previous product text file, but change the name of Product code 1 to ‘Regular Hard Disk Drive’ and add a new product ‘High End Hard Disk Drive’ (product code 5), as shown here:

PRODUCT CODE,PRODUCT NAME,PRODUCT GROUP
1          Regular Hard Disk Drive       Storage
2          Floppy Drive                  Storage
3          Flat Panel                    Monitor
4          Keyboard                      Peripheral
5          High End Hard Disk Drive      Storage

Second, you need to set your MySQL date to March 20, 2007 (a date later than March 16, 2007, which is the last date you ran the regular population script in Chapter 18). Now run the dw_regular_21.sql script:

mysql> . c:mysqlscriptsdw_regular_21.sql

You’ll see the following message on your console.

Database changed
Query OK, 9 rows affected (0.10 sec)

Query OK, 9 rows affected (0.14 sec)
Records: 9  Deleted: 0  Skipped: 0  Warnings: 0

Query OK, 0 rows affected (0.00 sec)
Rows matched: 0  Changed: 0  Warnings: 0

Query OK, 0 rows affected (0.00 sec)
Records: 0  Duplicates: 0  Warnings: 0

Query OK, 0 rows affected (0.01 sec)
Rows matched: 0  Changed: 0  Warnings: 0

Query OK, 0 rows affected (0.00 sec)
Records: 0  Duplicates: 0  Warnings: 0

Query OK, 19 rows affected (0.04 sec)

Query OK, 19 rows affected (0.11 sec)
Records: 19  Duplicates: 0  Warnings: 0

Query OK, 4 rows affected (0.09 sec)

Query OK, 5 rows affected (0.06 sec)
Records: 5  Deleted: 0  Skipped: 0  Warnings: 0

Query OK, 1 row affected (0.04 sec)
Rows matched: 1  Changed: 1  Warnings: 0

Query OK, 1 row affected (0.06 sec)
Records: 1  Duplicates: 0  Warnings: 0

Query OK, 1 row affected (0.05 sec)
Records: 1  Duplicates: 0  Warnings: 0

Query OK, 2 rows affected (0.10 sec)
Records: 2  Duplicates: 0  Warnings: 0

Query OK, 1 row affected (0.08 sec)
Rows matched: 1  Changed: 1  Warnings: 0

Query OK, 0 rows affected (0.00 sec)
Records: 0  Duplicates: 0  Warnings: 0

Query OK, 0 rows affected (0.01 sec)
Rows matched: 0  Changed: 0  Warnings: 0

Query OK, 0 rows affected (0.00 sec)
Rows matched: 0  Changed: 0  Warnings: 0

Query OK, 0 rows affected (0.01 sec)
Rows matched: 0  Changed: 0  Warnings: 0

Query OK, 0 rows affected (0.01 sec)
Rows matched: 0  Changed: 0  Warnings: 0

You can confirm that the regular population is correct by querying the product_count_fact table:

mysql> SELECT c.product_sk psk, c.product_code pc,
       b.product_launch_date_sk plsk,
    -> b.product_launch_date pld
    -> FROM product_count_fact a, product_launch_date_dim b,
    -> product_dim c
    -> WHERE a.product_launch_date_sk = b.product_launch_date_sk
    -> AND a.product_sk = c.product_sk
    -> ORDER BY product_code, product_launch_date;

Here is the correct result.

+-----+------+------+------------+
| psk | pc   | plsk | pld        |
+-----+------+------+------------+
|   1 |    1 |    1 | 2005-03-01 |
|   2 |    2 |    1 | 2005-03-01 |
|   3 |    3 |    1 | 2005-03-01 |
|   5 |    4 |  731 | 2007-03-01 |
|   7 |    5 |  750 | 2007-03-20 |
+-----+------+------+------------+
5 rows in set (0.01 sec)

Note

The launch date of the new product code (March 20, 200) was added. The change on the name of Product code 1 did not impact the launch date.


..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.
Reset
3.138.135.80