数据仓库开发之路之三--时间维度的创建

879 查看

在数据仓库中,无一例外地需要和时间维度打交道,因此设计合理的时间维度,也是一个数据仓库项目开始必备的资源储备,如果有这方面的积累,就不用到处寻找合适的设计模型以及存储过程的代码了,否则可能需要花费一定的时间去寻找符合该项目合适的存储过程,或者自己动手编写。
一般来说,时间维度的创建要根据实际的数据仓库应用来,基本上可以分为天、月的时间维度表,更细的可以分为半小时时间段,小时时间段等等,一般数据量大的时间维度都是利用存储过程来生成的。
下面介绍一些时间维度表的设计结构。

<1> 时间维度表1(两列都是字段)


<1> 时间维度表2
a.日期维度


b.半小时时间维度




c.时间段维度



3、比较完整的时间维度设计





构建时间维度的存储过程包体如下所示

create or replace package body PKG_LOAD_DIM_TIME
as
  /************************************
  ** 是否合理自然日
  ** 如果是合理自然日则返回1,否则返回0
  ************************************/
  function F_Is_Day_ID
  (
    p_TIME_ID DIW.DW_DIM_TIME.DAY_CODE%type
  ) return number
  is
    v_Date date;
  begin
    v_Date := to_date(p_TIME_ID, 'YYYYMMDD');
    return 1;
  exception
    when others then
      return 0;
  end F_Is_Day_ID;

  /************************************
  ** 加载时间维度数据
  ************************************/
  procedure P_BUILD_DIM_TIME
  (
    p_START_DATE DIW.DW_DIM_TIME.DAY_CODE%type,  --开始日期
    p_END_DATE DIW.DW_DIM_TIME.DAY_CODE%type     --结束日期
  )
  is
    v_START_DATE date;  --开始日期
    v_END_DATE date;    --结束日期
    v_DATE date;

    v_YEAR varchar2(4);
    v_QUARTER varchar2(1);
    v_MONTH varchar2(2);
    v_TEN_DAY varchar2(1);
    v_WEEK varchar2(2);
    v_WEEK_YEAR varchar2(4);
    v_DAY varchar2(2);

  begin
    --BICODE.PKG_ETL_CONTROLLER.P_ETL_WRITE_LOG('一般','BICODE.PKG_LOAD_DIM_TIME.P_BUILD_DIM_TIME执行开始...');
    execute immediate 'TRUNCATE TABLE DIW.ODS_DIM_TIME';

    if F_Is_Day_ID(p_START_DATE)=1 and F_Is_Day_ID(p_END_DATE)=1 then

        v_START_DATE := TO_DATE(p_START_DATE,'YYYYMMDD');
        v_END_DATE := TO_DATE(p_END_DATE,'YYYYMMDD');

        if v_END_DATE - v_START_DATE >= 0 then
          --开始生成
          for i in 0..v_END_DATE - v_START_DATE loop

            v_DATE := v_START_DATE+i;
            v_YEAR := TO_CHAR(v_DATE,'YYYY');
            v_QUARTER := TO_CHAR(v_DATE,'Q');
            v_MONTH := TO_CHAR(v_DATE,'MM');
            v_TEN_DAY := case
                           when TO_CHAR(v_DATE,'DD')<'11' then '1'
                           when TO_CHAR(v_DATE,'DD')<'21' then '2'
                           when TO_CHAR(v_DATE,'DD')<'32' then '3'
                         end;
            --周方案一:星期被年分开,实际是7天分段不是自然周
            --v_WEEK := TO_CHAR(v_DATE,'WW');
            --v_WEEK_YEAR := v_YEAR;

            --周方案二:本星期星期四所在年
            --v_WEEK := TO_CHAR(v_DATE,'IW');
            --select TO_CHAR(NEXT_DAY(v_DATE-7,2)+3,'YYYY') into v_WEEK_YEAR from dual;
            ----v_WEEK_YEAR := TO_CHAR(NEXT_DAY(v_DATE-7,2)+3,'YYYY');--奇怪的错误??

            --周方案三:自然周,周被年分,没找到函数支持。
            select
              LPAD(TO_CHAR(TRUNC((v_DATE
              -to_date(v_YEAR||'0101','yyyymmdd')
              +DECODE(TO_NUMBER(to_CHAR(to_date(v_YEAR||'0101','yyyymmdd'),'D')),1,7,
              TO_NUMBER(to_CHAR(to_date(v_YEAR||'0101','yyyymmdd'),'D'))-1)-1)/7+1)),2,'0')
              INTO v_WEEK
            from dual;
            v_WEEK_YEAR := v_YEAR;

            v_DAY := TO_CHAR(v_DATE,'DD');

            insert into DIW.DW_DIM_TIME
            (
              DAY_CODE,
              DAY_LONG_DESC,
              DAY_MEDIUM_DESC,
              DAY_SHORT_DESC,
              WEEK_CODE,
              WEEK_LONG_DESC,
              WEEK_MEDIUM_DESC,
              WEEK_SHORT_DESC,
              TEN_DAY_CODE,
              TEN_DAY_LONG_DESC,
              TEN_DAY_MEDIUM_DESC,
              TEN_DAY_SHORT_DESC,
              MONTH_CODE,
              MONTH_LONG_DESC,
              MONTH_MEDIUM_DESC,
              MONTH_SHORT_DESC,
              QUARTER_CODE,
              QUARTER_LONG_DESC,
              QUARTER_MEDIUM_DESC,
              QUARTER_SHORT_DESC,
              HALF_YEAR_CODE,
              HALF_LONG_DESC,
              HALF_MEDIUM_DESC,
              HALF_SHORT_DESC,
              YEAR_CODE,
              YEAR_LONG_DESC,
              YEAR_MEDIUM_DESC,
              YEAR_SHORT_DESC,
              ALL_TIME_CODE,
              ALL_TIME_DESC,
              DAY_TIMESPAN,
              DAY_END_DATE,
              WEEK_TIMESPAN,
              WEEK_END_DATE,
              TEN_DAY_TIMESPAN,
              TEN_DAY_END_DATE,
              MONTH_TIMESPAN,
              MONTH_END_DATE,
              QUARTER_TIMESPAN,
              QUARTER_END_DATE,
              HALF_YEAR_TIMESPAN,
              HALF_YEAR_END_DATE,
              YEAR_TIMESPAN,
              YEAR_END_DATE
            )
            values
            (
              v_YEAR||v_MONTH||v_DAY,
              v_YEAR||'年'||v_MONTH||'月'||v_DAY||'日',
              /*v_MONTH||'月'||*/v_DAY||'日',
              v_YEAR||'-'||v_MONTH||'-'||v_DAY,
              v_WEEK_YEAR||'W'||v_WEEK,
              v_WEEK_YEAR||'年第'||v_WEEK||'周',
              '第'||v_WEEK||'周',
              v_WEEK_YEAR||'-W'||v_WEEK,
              v_YEAR||v_MONTH||'X'||v_TEN_DAY,
              v_YEAR||'年'||v_MONTH||'月'||decode(v_TEN_DAY,'1','上','2','中','下')||'旬',
              decode(v_TEN_DAY,'1','上','2','中','下')||'旬',
              v_YEAR||'-'||v_MONTH||'-X'||v_TEN_DAY,
              v_YEAR||v_MONTH,
              v_YEAR||'年'||v_MONTH||'月',
              v_MONTH||'月',
              v_YEAR||'-'||v_MONTH,
              v_YEAR||'Q'||v_QUARTER,
              v_YEAR||'年第'||v_QUARTER||'季度',
              '第'||v_QUARTER||'季度',
              v_YEAR||'-'||'Q'||v_QUARTER,
              v_YEAR||'H'||decode(v_QUARTER,'1','1','2','1','2'),
              v_YEAR||'年'||decode(v_QUARTER,'1','上','2','上','下')||'半年',
              decode(v_QUARTER,'1','上','2','上','下')||'半年',
              v_YEAR||'-'||'H'||decode(v_QUARTER,'1','1','2','1','2'),
              v_YEAR,
              v_YEAR||'年',
              v_YEAR||'年',
              v_YEAR,
              'ALL',
              'ALL_TIME',
              1,
              v_YEAR||v_MONTH||v_DAY,
              case
                when
                  TO_CHAR(NEXT_DAY(v_DATE-7,2),'YYYY')<v_WEEK_YEAR
                then
                  NEXT_DAY(v_DATE,2)-TO_DATE(v_WEEK_YEAR||'0101','YYYYMMDD')
                when
                  TO_CHAR(NEXT_DAY(v_DATE,2),'YYYY')>v_WEEK_YEAR
                then
                  TO_DATE(v_WEEK_YEAR||'1231','YYYYMMDD')-NEXT_DAY(v_DATE-7,2)+1
                else 7
              end,
              case
                when
                  TO_CHAR(NEXT_DAY(v_DATE,2),'YYYY')>v_WEEK_YEAR
                then v_WEEK_YEAR||'1231'
                else TO_CHAR(NEXT_DAY(v_DATE,2)-1,'YYYYMMDD')
              end,
              decode(v_TEN_DAY,'3',LAST_DAY(v_DATE)-TO_DATE(v_YEAR||v_MONTH||'21','YYYYMMDD')+1,10),
              decode(v_TEN_DAY,'3',TO_CHAR(LAST_DAY(v_DATE),'YYYYMMDD'),'2',v_YEAR||v_MONTH||'20',v_YEAR||v_MONTH||'10'),
              TO_NUMBER(TO_CHAR(LAST_DAY(v_DATE),'DD')),
              TO_CHAR(LAST_DAY(v_DATE),'YYYYMMDD'),
              decode(v_QUARTER,'1',TO_DATE(v_YEAR||'0331','YYYYMMDD')-TO_DATE(v_YEAR||'0101','YYYYMMDD')+1,'2',91,92),
              decode(v_QUARTER,'1',v_YEAR||'0331','2',v_YEAR||'0630','3',v_YEAR||'0930',v_YEAR||'1231'),
              decode(v_QUARTER,'3',184,'4',184,TO_DATE(v_YEAR||'0630','YYYYMMDD')-TO_DATE(v_YEAR||'0101','YYYYMMDD')+1),
              decode(v_QUARTER,'1',v_YEAR||'0630','2',v_YEAR||'0630',v_YEAR||'1231'),
              TO_DATE(v_YEAR||'1231','YYYYMMDD')-TO_DATE(v_YEAR||'0101','YYYYMMDD')+1,
              v_YEAR||'1231'
            )
            ;
          end loop;
          commit;
        end if;
    end if;

    --BICODE.PKG_ETL_CONTROLLER.P_UPDATE_ETL_TIME_INFO('DW','DIM_TIME');
    --BICODE.PKG_ETL_CONTROLLER.P_ETL_WRITE_LOG('一般','BICODE.PKG_LOAD_DIM_TIME.P_BUILD_DIM_TIME执行结束...');

    exception
      when others then
        rollback;
        --BICODE.PKG_ETL_CONTROLLER.P_ETL_WRITE_LOG('异常','BICODE.PKG_LOAD_DIM_TIME.P_BUILD_DIM_TIME执行失败...');    

  end P_BUILD_DIM_TIME;
end PKG_LOAD_DIM_TIME;

有时候需要精确度到月份的时间维度表,由于上面的日期时间维度表相对信息比较丰富,可以建立一个视图来影射一个月度时间维度表,如下图所示:



具体的视图Sql如下所示:

CREATE OR REPLACE VIEW DW_DIM_TIME_MONTH_V AS
SELECT DISTINCT
  T.MONTH_CODE,
  T.MONTH_LONG_DESC,
  T.MONTH_MEDIUM_DESC,
  T.MONTH_SHORT_DESC,
  T.QUARTER_CODE,
  T.QUARTER_LONG_DESC,
  T.QUARTER_MEDIUM_DESC,
  T.QUARTER_SHORT_DESC,
  T.HALF_YEAR_CODE,
  T.HALF_LONG_DESC,
  T.HALF_MEDIUM_DESC,
  T.HALF_SHORT_DESC,
  T.YEAR_CODE,
  T.YEAR_LONG_DESC,
  T.YEAR_MEDIUM_DESC,
  T.YEAR_SHORT_DESC
FROM DIW.DW_DIM_TIME T;