Revising the Regular Population Script

Since you’ve modified the database structure, you will also need to revise the regular population script. There are changes in three areas:

  • You need to remove all zip code related columns in the customer dimension population, as the customer addresses and shipping zip codes are no longer in the customer dimension

  • You need to use the surrogate keys from the customer zip code and shipping zip code dimension views. (Shown in bold in the script in Listing 23.6)

  • You need to move the pa_customer_dim population after the sales_order_fact population because to get the customer zip code you need to go to the customer_zip_code_sk in sales order fact table.

The regular population script in Listing 23.6 implements these three changes.

Listing 23.6. Revised daily population
/******************************************************************/
/*                                                                */
/* dw_regular_23.sql                                              */
/*                                                                */
/******************************************************************/

USE dw;

/* CUSTOMER_DIM POPULATION                                        */

TRUNCATE customer_stg;

LOAD DATA INFILE 'customer.csv'
INTO TABLE customer_stg
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '
'
IGNORE 1 LINES
( customer_number
, customer_name
, customer_street_address
, customer_zip_code
, customer_city
, customer_state
, shipping_address
, shipping_zip_code
, shipping_city
, shipping_state )
;

/* SCD 2 ON ADDRESSES                                             */

UPDATE
  customer_dim a
, customer_stg b
SET
  a.expiry_date = SUBDATE(CURRENT_DATE, 1)
WHERE
  a.customer_number = b.customer_number
AND (   a.customer_street_address <> b.customer_street_address
     OR a.shipping_address <> b.shipping_address
     OR a.shipping_address IS NULL )
AND expiry_date = '9999-12-31'
;

INSERT INTO customer_dim
SELECT
  NULL
, b.customer_number
, b.customer_name
, b.customer_street_address
, b.shipping_address
, CURRENT_DATE
, '9999-12-31'
FROM
  customer_dim a
, customer_stg b
WHERE
    a.customer_number = b.customer_number
AND (   a.customer_street_address <> b.customer_street_address
     OR a.shipping_address <> b.shipping_address
     OR a.shipping_address IS NULL )
AND EXISTS (
SELECT *
FROM customer_dim x
WHERE b.customer_number = x.customer_number
AND a.expiry_date = SUBDATE(CURRENT_DATE, 1))
AND NOT EXISTS (
SELECT *
FROM customer_dim y
WHERE     b.customer_number = y.customer_number
      AND y.expiry_date = '9999-12-31')
;

/* END OF SCD 2                                                   */

/* SCD 1 ON NAME                                                  */

UPDATE customer_dim a, customer_stg b
SET a.customer_name = b.customer_name
WHERE     a.customer_number = b.customer_number
      AND a.expiry_date = '9999-12-31'
      AND a.customer_name <> b.customer_name
;

/* ADD NEW CUSTOMER                                               */

INSERT INTO customer_dim
SELECT
  NULL
, customer_number
, customer_name
, customer_street_address
, shipping_address
, CURRENT_DATE
, '9999-12-31'
FROM customer_stg
WHERE customer_number NOT IN(
SELECT a.customer_number
FROM
  customer_dim a
, customer_stg b
WHERE b.customer_number = a.customer_number )
;

/* END OF CUSTOMER_DIM POPULATION                                 */

/* PRODUCT_DIM POPULATION                                         */

TRUNCATE product_stg
;

LOAD DATA INFILE 'product.txt'
INTO TABLE product_stg
FIELDS TERMINATED BY ''
OPTIONALLY ENCLOSED BY ''
LINES TERMINATED BY '
'
IGNORE 1 LINES
( product_code
, product_name
, product_category )
;

/* SCD2 ON PRODUCT NAME AND GROUP                                 */

UPDATE
  product_dim a
, product_stg b
SET
  expiry_date = SUBDATE(CURRENT_DATE, 1)
WHERE
    a.product_code = b.product_code
AND (   a.product_name <> b.product_name
     OR a.product_category <> b.product_category )
     AND expiry_date = '9999-12-31'
;

INSERT INTO product_dim
SELECT
  NULL
, b.product_code
, b.product_name
, b.product_category
, CURRENT_DATE
, '9999-12-31'
FROM
  product_dim a
, product_stg b
WHERE
    a.product_code = b.product_code
AND (   a.product_name <> b.product_name
     OR a.product_category <> b.product_category )
AND EXISTS (
SELECT *
FROM product_dim x
WHERE     b.product_code = x.product_code
      AND a.expiry_date = SUBDATE(CURRENT_DATE, 1))
AND NOT EXISTS (
SELECT *
FROM product_dim y
WHERE    b.product_code = y.product_code
     AND y.expiry_date = '9999-12-31')
;

/* END OF SCD 2                                                   */

/* ADD NEW PRODUCT                                                */

INSERT INTO product_dim
SELECT
  NULL
, product_code
, product_name
, product_category
, CURRENT_DATE
, '9999-12-31'
FROM product_stg
WHERE product_code NOT IN(
SELECT y.product_code
FROM product_dim x, product_stg y
WHERE x.product_code = y.product_code )
;

/* END OF PRODUCT_DIM POPULATION                                  */

/* PRODUCT_COUNT_FACT POPULATION                                  */

TRUNCATE product_count_fact
;

INSERT INTO product_count_fact
(product_sk, product_launch_date_sk)
SELECT
  a.product_sk
, b.date_sk
FROM
  product_dim a
, date_dim b
WHERE
a.effective_date = b.date
GROUP BY product_code
HAVING COUNT(product_code) = 1
;

/* for products that have been updated by SCD2                    */

INSERT INTO product_count_fact (product_sk, product_launch_date_sk)
SELECT
  a.product_sk
, b.date_sk

FROM
  product_dim a
, date_dim b

WHERE
a.effective_date = b.date
GROUP BY product_code
HAVING COUNT(product_code) > 1
;

/* END OF PRODUCT_COUNT_FACT POPULATION                           */

/* INSERT NEW ORDERS                                              */

INSERT INTO sales_order_fact
SELECT
  b.customer_sk
, h.customer_zip_code_sk
, i.shipping_zip_code_sk
, c.product_sk
, f.sales_order_attribute_sk
, d.order_date_sk
, NULL
, NULL
, NULL
, NULL
, g.entry_date_sk
, a.order_number
, e.request_delivery_date_sk
, order_amount
, quantity
, NULL
, NULL
, NULL
, NULL
FROM
  source.sales_order a
, customer_dim b
, product_dim c
, order_date_dim d
, request_delivery_date_dim e
, sales_order_attribute_dim f
, entry_date_dim g
, customer_zip_code_dim h
, shipping_zip_code_dim i
, customer_stg j
WHERE
      order_status = 'N'
AND a.entry_date = CURRENT_DATE
AND a.customer_number = b.customer_number
AND a.status_date >= b.effective_date
AND a.status_date <= b.expiry_date
AND a.customer_number = j.customer_number
AND j.customer_zip_code = h.customer_zip_code
AND a.status_date >= h.effective_date
AND a.status_date <= h.expiry_date
AND j.shipping_zip_code = i.shipping_zip_code
AND a.status_date >= i.effective_date
AND a.status_date <= i.expiry_date
AND a.product_code = c.product_code
AND a.status_date >= c.effective_date
AND a.status_date <= c.expiry_date
AND a.status_date = d.order_date
AND a.entry_date = g.entry_date
AND a.request_delivery_date = e.request_delivery_date
AND a.verification_ind = f.verification_ind
AND a.credit_check_flag = f.credit_check_flag
AND a.new_customer_ind = f.new_customer_ind
AND a.web_order_flag = f.web_order_flag
AND a.status_date >= f.effective_date
AND a.status_date <= f.expiry_date
;

/* RE-BUILD PA CUSTOMER DIMENSION                                 */

TRUNCATE pa_customer_dim;

INSERT INTO pa_customer_dim
SELECT DISTINCT a.*
FROM
  customer_dim a
, sales_order_fact b
, customer_zip_code_dim c
WHERE
    c.customer_state = 'PA'
AND b.customer_zip_code_sk = c.customer_zip_code_sk
AND a.customer_sk = b.customer_sk
;

UPDATE
  sales_order_fact a
, source.sales_order b
, allocate_date_dim c
SET
  a.allocate_date_sk = c.allocate_date_sk
, a.allocate_quantity = b.quantity
WHERE
    order_status = 'A'
AND b.entry_date = CURRENT_DATE
AND b.order_number = a.order_number
AND c.allocate_date = b.status_date
;

UPDATE
  sales_order_fact a
, source.sales_order b
, packing_date_dim d
SET
  a.packing_date_sk = d.packing_date_sk
, a.packing_quantity = b.quantity
WHERE
    order_status = 'P'
AND b.entry_date = CURRENT_DATE
AND b.order_number = a.order_number
AND d.packing_date = b.status_date
;

UPDATE sales_order_fact a, source.sales_order b, ship_date_dim e
SET
  a.ship_date_sk = e.ship_date_sk
, a.ship_quantity = b.quantity
WHERE
    order_status = 'S'
AND b.entry_date = CURRENT_DATE
AND b.order_number = a.order_number
AND e.ship_date = b.status_date
;

UPDATE
  sales_order_fact a
, source.sales_order b
, receive_date_dim f
SET
  a.receive_date_sk = f.receive_date_sk
, a.receive_quantity = b.quantity
WHERE
    order_status = 'R'
AND b.entry_date = CURRENT_DATE
AND b.order_number = a.order_number
AND f.receive_date = b.status_date
;

/* end of script                                                  */

..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.
Reset
18.188.218.226