Data Market

销售主题统计宽表

手撕代码Pseudocode

Tips:Please follow these steps before you start your project !

Code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
-- 基于DWS层销售主题日统计宽表的值,上卷出年月周日的销售主题统计宽表(年月周日)
-- 动作一:求八种维度组合情况
step1:梳理表关系(指标和DWS一致,需要与dwd.dim_date 时间维表相关联)
-- 整个表和DWS层销售主题统计宽表dws_sale_daycount的区别就在于多了开头的时间粒度字段
-- 改造1,dim_date日期维度表字段太多,抽取出要用的字段,放到CTE表达式中, 最后join.
with dd as (
select
dim_date_id,
date_code,
date_id_mom, -- 与本月环比的上月日期
date_id_mym -- 与本月同比的上年日期
year_code,
month_code,
year_month, --年月
day_month_num, --几号
week_day_code, --周几
year_week_name_cn, --年周
from yp_dwd.dim_date
)
select
'2023-06-04' as date_time,
-- step5和日期相关的所有维度(year、month、week、date(就是天day))
case
when grouping(year_code,month_code,day_month_num,dim_date_id)=0 then 'date' -- 天
when grouping(year_code,year_week_name_cn)=0 then 'week'
when grouping(year_code,month_code,year_month)=0 then 'month'
else 'year'
end as time_type
dt,
year_code,
year_month,
month_code,
day_of_month,
dim_date_id,
year_week_naem_cnt,
-- 这里使用取巧版,找不同
case
when grouping(dim_date_id)=0 then 'store'

-- step4,完成除了日期维度外,其他维度的计算
-- 通过case when来算出分组类型 group_type
case grouping(city_id,tread_area_id,store_id,brand_id,min_class_id,mid_class_id,max_class_id)
-- 这里的grouping=0代表有,grouping为1代表没有.
when 63 then '城市'
when 31 then '城市+商圈'
when 15 then '城市+商圈+店铺'
when 32+64+128+4+2+1 then '品牌'

-- 14个维度字段(城市,商圈,店铺,品牌,大类,中类,小类)
city_id,city_name,
tread_area_id,trade_area_name,
store_id,store_name,
brand_id,brand_name,
max_class_id,max_class_name,
mid_class_id,mid_class_name,
min_class_id,min_class_name

-- 完成16个指标计算
sum(dc.sale_amt) as sale_amt, -- 销售金额
sum(dc.plat_amt) as plat_amt, -- 平台分润
sum(dc.deliver_sale_amout) as deliver_sale_amt -- 配送费
......
from yp_dws.dws_sale_daycount dc -- 销售主题日统计宽表
left join yp_dwd.dim_date dd on dd.dt=dc.date_code; -- 日期维度表
-- 与dim_data关联的原因是获取时间字段,比如需要年中的第几周,月中的第几天这种字段
group by
-- step2 年的八种维度,同理可以写出 月 周 日 的八种维度,总共32种维度
grouping sets(
-- 年
(dd.year_code), -- 按照年来分组,然后union all
(dd.year_code, city_id, city_name), -- 按照年和城市来分组,然后union all
(dd.year_code, city_id, city_name, trade_area_id, trade_area_name),
(dd.year_code, city_id, city_name, trade_area_id, trade_area_name, store_id, store_name),
(dd.year_code, brand_id, brand_name),
(dd.year_code, max_class_id, max_class_name),
(dd.year_code, max_class_id, max_class_name,mid_class_id, mid_class_name),
(dd.year_code, max_class_id, max_class_name,mid_class_id, mid_class_name,min_class_id, min_class_name),
-- 月
(dd.year_code,month_code), -- 按照月来分组,然后union all
(dd.year_code,month_code, city_id, city_name), -- 按照年和城市来分组,然后union all
(dd.year_code,month_code, city_id, city_name, trade_area_id, trade_area_name),
(dd.year_code,month_code, city_id, city_name, trade_area_id, trade_area_name, store_id, store_name),
(dd.year_code,month_code, brand_id, brand_name),
(dd.year_code,month_code, max_class_id, max_class_name),
(dd.year_code,month_code, max_class_id, max_class_name,mid_class_id, mid_class_name),
(dd.year_code,month_code, max_class_id, max_class_name,mid_class_id, mid_class_name,min_class_id, min_class_name)
-- 这里周和日就不写了....
);
-- 改造
1. yp_dwd.dim_date日期维度表的字段太多了, 我们只需要抽取出我们要用的字段, 然后放到CTE表达式中, 最后和 dws层的销售主题日统计宽表 做 连接查询.
2. 改造grouping sets()的代码, 加上: 月(8种维度), 周(8种维度), 日(8种维度) 结合已经实现的 年(8种维度), 共计 32 种维度组合.
3. 完善和日期维度有关的字段.
4. 目前我们判断日期类型(time_type) 和 维度类型(group_type)都是使用 简单版思路(找不同), 筛掉哪些具有独立属性的, 最终能甄别出每一种维度组合.
其实这个代码可以用 grouping(维度1, 维度2, 维度3....) = 二进制对应的十进制数据 来实现精准校验.

Create Table

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

CREATE TABLE yp_dm.dm_sale(
date_time string COMMENT '统计日期,不能用来分组统计',
time_type string COMMENT '统计时间维度:year、month、week、date(就是天day)',
year_code string COMMENT '年code',
year_month string COMMENT '年月',
month_code string COMMENT '月份编码',
day_month_num string COMMENT '一月第几天',
dim_date_id string COMMENT '日期',
year_week_name_cn string COMMENT '年中第几周',

group_type string COMMENT '分组类型:store,trade_area,city,brand,min_class,mid_class,max_class,all',
city_id string COMMENT '城市id',
city_name string COMMENT '城市name',
trade_area_id string COMMENT '商圈id',
trade_area_name string COMMENT '商圈名称',
store_id string COMMENT '店铺的id',
store_name string COMMENT '店铺名称',
brand_id string COMMENT '品牌id',
brand_name string COMMENT '品牌名称',
max_class_id string COMMENT '商品大类id',
max_class_name string COMMENT '大类名称',
mid_class_id string COMMENT '中类id',
mid_class_name string COMMENT '中类名称',
min_class_id string COMMENT '小类id',
min_class_name string COMMENT '小类名称',
-- =======统计=======
-- 销售收入
sale_amt DECIMAL(38,2) COMMENT '销售收入',
-- 平台收入
plat_amt DECIMAL(38,2) COMMENT '平台收入',
-- 配送成交额
deliver_sale_amt DECIMAL(38,2) COMMENT '配送成交额',
-- 小程序成交额
mini_app_sale_amt DECIMAL(38,2) COMMENT '小程序成交额',
-- 安卓APP成交额
android_sale_amt DECIMAL(38,2) COMMENT '安卓APP成交额',
-- 苹果APP成交额
ios_sale_amt DECIMAL(38,2) COMMENT '苹果APP成交额',
-- PC商城成交额
pcweb_sale_amt DECIMAL(38,2) COMMENT 'PC商城成交额',
-- 成交单量
order_cnt BIGINT COMMENT '成交单量',
-- 参评单量
eva_order_cnt BIGINT COMMENT '参评单量comment=>cmt',
-- 差评单量
bad_eva_order_cnt BIGINT COMMENT '差评单量negtive-comment=>ncmt',
-- 配送成交单量
deliver_order_cnt BIGINT COMMENT '配送单量',
-- 退款单量
refund_order_cnt BIGINT COMMENT '退款单量',
-- 小程序成交单量
miniapp_order_cnt BIGINT COMMENT '小程序成交单量',
-- 安卓APP订单量
android_order_cnt BIGINT COMMENT '安卓APP订单量',
-- 苹果APP订单量
ios_order_cnt BIGINT COMMENT '苹果APP订单量',
-- PC商城成交单量
pcweb_order_cnt BIGINT COMMENT 'PC商城成交单量'
)
COMMENT '销售主题宽表'
ROW format delimited fields terminated BY '\t'
stored AS orc tblproperties ('orc.compress' = 'SNAPPY');

All_Code_Details

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
insert into hive.yp_dm.dm_sale
-- 获取日期数据(周、月的环比/同比日期)
with dt1 as (
select
dim_date_id,
date_code
,date_id_mom -- 与本月环比的上月日期
,date_id_mym -- 与本月同比的上年日期
,year_code
,month_code
,year_month --年月
,day_month_num --几号
,week_day_code --周几
,year_week_name_cn --年周
from yp_dwd.dim_date
)
-- step1: 梳理表关系, 销售主题日统计宽表 left join yp_dwd.dim_date 日期维度表
select
-- 改造3: 完善和日期维度有关的字段.
-- step5: 日期维度字段
'2023-06-04' as date_time,
-- time_type字段具体的值: string comment '统计时间维度:year、month、week、date(就是天day)',
case
when grouping(year_code, month_code, day_month_num, dim_date_id)=0 then 'date'
when grouping(year_code, year_week_name_cn)=0 then 'week'
when grouping(year_code, month_code, year_month)=0 then 'month'
else 'year'
end as time_type,
dt1.year_code, -- 年, 例如: 2023
dt1.year_month, -- 年月, 例如: 202303
dt1.month_code, -- 月, 例如: 03
dt1.day_month_num, -- 月中的第几天, 例如: 21
dt1.dim_date_id, -- 具体的日期, 例如: 20230321
dt1.year_week_name_cn, -- 年中的第几周
-- step4: 除(日期维度)外, 其它所有的维度, 即: 城市, 商圈, 店铺, 品牌, 大类, 中类, 小类.
-- 1个分组类型字段, 分组类型:store,trade_area,city,brand,min_class,mid_class,max_class,all
case
when grouping(store_id, store_name)=0 then 'store' -- 日期 + 城市 + 商圈 + 店铺
when grouping(trade_area_id, trade_area_name)=0 then 'trade_area' -- 日期 + 城市 + 商圈
when grouping(city_id, city_name)=0 then 'city' -- 日期 + 城市
when grouping(brand_id, brand_name)=0 then 'brand' -- 日期 + 品牌
when grouping(min_class_id, min_class_name)=0 then 'min_class' -- 日期 + 大类 + 中类 + 小类
when grouping(mid_class_id, mid_class_name)=0 then 'mid_class' -- 日期 + 大类 + 中类
when grouping(max_class_id, max_class_name)=0 then 'max_class' -- 日期 + 大类
else 'all' -- 日期
end as group_type,
-- 14个维度字段
city_id,
city_name,
trade_area_id,
trade_area_name,
store_id,
store_name,
brand_id,
brand_name,
max_class_id,
max_class_name,
mid_class_id,
mid_class_name,
min_class_id,
min_class_name,
-- step3: 计算 16项 指标
sum(sale_amt) as sale_amt,
sum(plat_amt) as plat_amt,
sum(deliver_sale_amt) as deliver_sale_amt,
sum(mini_app_sale_amt) as mini_app_sale_amt,
sum(android_sale_amt) as android_sale_amt,
sum(ios_sale_amt) as ios_sale_amt,
sum(pcweb_sale_amt) as pcweb_sale_amt,
sum(order_cnt) as order_cnt,
sum(eva_order_cnt) as eva_order_cnt,
sum(bad_eva_order_cnt) as bad_eva_order_cnt,
sum(deliver_order_cnt) as deliver_order_cnt,
sum(refund_order_cnt) as refund_order_cnt,
sum(miniapp_order_cnt) as miniapp_order_cnt,
sum(android_order_cnt) as android_order_cnt,
sum(ios_order_cnt) as ios_order_cnt,
sum(pcweb_order_cnt) as pcweb_order_cnt
from yp_dws.dws_sale_daycount dc -- dws层的 销售主题日统计宽表
left join dt1 on dc.dt = dt1.date_code -- dwd层的 日期维度表
-- step2: 按年进行分组, 结合8种维度, 进行统计.
group by
grouping sets(
-- 改造2: grouping sets()的代码, 加上: 月(8种维度), 周(8种维度), 日(8种维度) 结合已经实现的 年(8种维度), 共计 32 种维度组合.
-- 按年统计, 8种维度.
(dt1.year_code), -- 日期( 年 )
(dt1.year_code, city_id, city_name), -- 日期( 年 ) + 城市
(dt1.year_code, city_id, city_name, trade_area_id, trade_area_name), -- 日期( 年 ) + 城市 + 商圈
(dt1.year_code, city_id, city_name, trade_area_id, trade_area_name, store_id, store_name), -- 日期( 年 ) + 城市 + 商圈 + 店铺
(dt1.year_code, brand_id, brand_name), -- 日期( 年 ) + 品牌
(dt1.year_code, max_class_id, max_class_name), -- 日期( 年 ) + 大类
(dt1.year_code, max_class_id, max_class_name, mid_class_id, mid_class_name), -- 日期( 年 ) + 大类 + 中类
(dt1.year_code, max_class_id, max_class_name, mid_class_id, mid_class_name, min_class_id, min_class_name), -- 日期( 年 ) + 大类 + 中类 + 小类

-- 按月统计, 8种维度.
(dt1.year_code, dt1.month_code, dt1.year_month), -- 日期( 月 )
(dt1.year_code, dt1.month_code, dt1.year_month, city_id, city_name), -- 日期( 月 ) + 城市
(dt1.year_code, dt1.month_code, dt1.year_month, city_id, city_name, trade_area_id, trade_area_name), -- 日期( 月 ) + 城市 + 商圈
(dt1.year_code, dt1.month_code, dt1.year_month, city_id, city_name, trade_area_id, trade_area_name, store_id, store_name), -- 日期( 月 ) + 城市 + 商圈 + 店铺
(dt1.year_code, dt1.month_code, dt1.year_month, brand_id, brand_name), -- 日期( 月 ) + 品牌
(dt1.year_code, dt1.month_code, dt1.year_month, max_class_id, max_class_name), -- 日期( 月 ) + 大类
(dt1.year_code, dt1.month_code, dt1.year_month, max_class_id, max_class_name, mid_class_id, mid_class_name), -- 日期( 月 ) + 大类 + 中类
(dt1.year_code, dt1.month_code, dt1.year_month, max_class_id, max_class_name, mid_class_id, mid_class_name, min_class_id, min_class_name), -- 日期( 年 ) + 大类 + 中类 + 小类

-- 按周统计, 8种维度.
(dt1.year_code, dt1.year_week_name_cn), -- 日期( 周 )
(dt1.year_code, dt1.year_week_name_cn, city_id, city_name), -- 日期( 周 ) + 城市
(dt1.year_code, dt1.year_week_name_cn, city_id, city_name, trade_area_id, trade_area_name), -- 日期( 周 ) + 城市 + 商圈
(dt1.year_code, dt1.year_week_name_cn, city_id, city_name, trade_area_id, trade_area_name, store_id, store_name), -- 日期( 周 ) + 城市 + 商圈 + 店铺
(dt1.year_code, dt1.year_week_name_cn, brand_id, brand_name), -- 日期( 周 ) + 品牌
(dt1.year_code, dt1.year_week_name_cn, max_class_id, max_class_name), -- 日期( 周 ) + 大类
(dt1.year_code, dt1.year_week_name_cn, max_class_id, max_class_name, mid_class_id, mid_class_name), -- 日期( 周 ) + 大类 + 中类
(dt1.year_code, dt1.year_week_name_cn, max_class_id, max_class_name, mid_class_id, mid_class_name, min_class_id, min_class_name), -- 日期( 周 ) + 大类 + 中类 + 小类

-- 按天统计, 8种维度.
(dt1.year_code, dt1.month_code, dt1.day_month_num, dt1.dim_date_id), -- 日期( 天 )
(dt1.year_code, dt1.month_code, dt1.day_month_num, dt1.dim_date_id, city_id, city_name), -- 日期( 天 ) + 城市
(dt1.year_code, dt1.month_code, dt1.day_month_num, dt1.dim_date_id, city_id, city_name, trade_area_id, trade_area_name), -- 日期( 天 ) + 城市 + 商圈
(dt1.year_code, dt1.month_code, dt1.day_month_num, dt1.dim_date_id, city_id, city_name, trade_area_id, trade_area_name, store_id, store_name), -- 日期( 天 ) + 城市 + 商圈 + 店铺
(dt1.year_code, dt1.month_code, dt1.day_month_num, dt1.dim_date_id, brand_id, brand_name), -- 日期( 天 ) + 品牌
(dt1.year_code, dt1.month_code, dt1.day_month_num, dt1.dim_date_id, max_class_id, max_class_name), -- 日期( 天 ) + 大类
(dt1.year_code, dt1.month_code, dt1.day_month_num, dt1.dim_date_id, max_class_id, max_class_name, mid_class_id, mid_class_name), -- 日期( 天 ) + 大类 + 中类
(dt1.year_code, dt1.month_code, dt1.day_month_num, dt1.dim_date_id, max_class_id, max_class_name, mid_class_id, mid_class_name, min_class_id, min_class_name) -- 日期( 天 ) + 大类 + 中类 + 小类
);

商品主题统计宽表

Create Table

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
create table yp_dm.dm_sku(
dt string,
sku_id string comment 'sku_id',

last_30d_order_count bigint comment '被下单次数',
last_30d_order_num bigint comment '被下单件数',
last_30d_order_amount decimal(38, 2) comment '被下单金额',

order_count bigint comment '被下单次数 总累计',
order_num bigint comment '被下单件数 总累计',
order_amount decimal(38, 2) comment '被下单金额 总累计',


last_30d_payment_count bigint comment '被支付次数',
last_30d_payment_num bigint comment '被支付件数',
last_30d_payment_amount decimal(38, 2) comment '被支付金额',

payment_count bigint comment '被支付次数',
payment_num bigint comment '被支付件数',
payment_amount decimal(38, 2) comment '被支付金额',
refund_count bigint comment '被退款次数',
refund_num bigint comment '被退款件数',
refund_amount decimal(38, 2) comment '被退款金额',
cart_count bigint comment '被加入购物车次数',
cart_num bigint comment '被加入购物车件数',
favor_count bigint comment '被收藏次数',
evaluation_good_count bigint comment '好评数',
evaluation_mid_count bigint comment '中评数',
evaluation_bad_count bigint comment '差评数'
) comment '商品主题宽表'
row format delimited fields terminated by '\t'
stored as orc tblproperties ('orc.compress'='snappy');

手撕代码(Pseudocode)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
-- 完成DM层-商品主题统计宽表(总累计, 近30天累计)
-- 第1阶段: 首次计算 总累计(从开始时间 ~ 当前时间, 按照sku_id分组, sum累加即可)
-- step1: 创建 yp_dm.dm_sku 表, Hive中完成.
create table yp_dm.dm_sku(
-- 商品id
sku_id string,
-- 各种指标, 总累计, 近30天累计
) comment '商品主题统计宽表'
row format delimited fields terminated by '\t'
stored as orc tblproperties('orc.compress'='snappy');

-- step2: 首次计算 总累计 和 近30天累计.从表yp_dws.dws_sku_daycount中抽取字段
insert into hive.yp_dm.dm_sku -- 将抽取的字段插入到dm_sku表中
with all_count as (
select
sku_id,
sum(order_count) as order_count, -- 下单次数(求累计
sum(order_num) as order_num, -- 下单件数
sum(order_amt) as order_amt, -- 下单金额
.......
from yp_dws.dws_sku_daycount
group by sku_id -- 根据商品id分类
),
last_30d as (
select
sku_id,
sum(order_count) as order_count, -- 下单件数(近30天累计
sum(order_num) as order_num,
sum(order_amt) as order_amt,
......
from yp_dws.dws_sku_daycount
-- 我以为的date_add 函数是date_add('2020-05-08',数字)
where dt >= cast(date_add('day', -30, date '2020-05-08') as varchar)
-- 这里使用类型转换cast函数,将日期类型转换为varchar类型
)
-- 把上述结果进行汇总,
select
ac.sku_id,
ac.order_count,
ac.order_num,
ac.order_amt,

l30.order_last_30d_count,
l30.order_last_30d_num,
l30.order_last_30d_amount
from all_count ac left join last_30d l30 on ac.sku_id=l30.sku_id;

-- 循环计算 总累计 近30天累计(旧的总累计 + 新增一天的数据
-- 循环计算 近30天累计 按照sku_id分组,sum累加即可
with old as (
select * from yp_dm.dm_sku -- 旧的总累计
),
new as (
select
sku_id, -- 商品id
-- 计算最新一天累计
sum(if(dt='2020-05-09',order_count, 0)) as order_count_1d,
sum(if(dt='2020-05-09',order_num,0)) as order_num_1d,
sum(if(dt='2020-05-09',order_amt,0)) as order_amt_1d,
-- 计算 近30天累计
sum(order_count) as order_count_30,
sum(order_num) as order_num_30,
sum(order_amt) as order_amt_30
from
yp_dws.dws_sku_daycount
where dt >= cast(date_add('day', -30, date '2020-50-09') as varchar)
group by sku_id
)
-- 基于旧的总累计 和 new(近30天累计,最新一天的数据) 计算最终结果
select
coalesce(old.sku_id, new.sku_id) as sku_id,
-- 最新的30天累计
coalesce(new.order_count30, 0) as order_count,
coalesce(new.order_num_30, 0) as order_num_30,
coalesce(new.order_amt_30, 0) as order_amt,
-- 新的总累计 = 旧的总累计 + 最新一天的数据
coalesce(old.order_count,0) + coalesce(new.order_count_1d, 0) as order_count,
coalesce(old.order_num, 0) + coalesce(new.order_num_1d, 0) as order_num,
coalesce(old.order_amt, 0) + coalesce(new.order_amt_1d, 0) as order_amt
from old full outer join new on old.sku_id = new.sku_id;

-- 创建临时表,用于存储循环计算的结果
create table yp_dm.dm_sku_tmp as select * from yp_dm.dm_sku; -- ctas 建表语句

-- 删除旧的累计数据.
delete from yp_dm.dm_sku;

-- 用临时表(最新数据) 覆盖 yp_dm.dm_sku;
insert into yp_dm.dm_sku select * from yp_dm.dm_sku_tmp;

-- 查询最终结果.
select * from yp_dm.dm_sku;

All_Code_Details

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
-- 首次执行,需要计算总累计值
insert into yp_dm.dm_sku
with all_count as (
select
sum(order_count) as order_count,
sum(order_num) as order_num,
sum(order_amount) as order_amount,
sum(payment_count) payment_count,
sum(payment_num) payment_num,
sum(payment_amount) payment_amount,
sum(refund_count) refund_count,
sum(refund_num) refund_num,
sum(refund_amount) refund_amount,
sum(cart_count) cart_count,
sum(cart_num) cart_num,
sum(favor_count) favor_count,
sum(evaluation_good_count) evaluation_good_count,
sum(evaluation_mid_count) evaluation_mid_count,
sum(evaluation_bad_count) evaluation_bad_count,
sku_id
from yp_dws.dws_sku_daycount
-- where order_count > 0
group by sku_id
),
last_30d as (
select
sum(order_count) order_last_30d_count,
sum(order_num) order_last_30d_num,
sum(order_amount) as order_last_30d_amount,

sum(payment_count) payment_last_30d_count,
sum(payment_num) payment_last_30d_num,
sum(payment_amount) payment_last_30d_amount,

sum(refund_count) refund_last_30d_count,
sum(refund_num) refund_last_30d_num,
sum(refund_amount) refund_last_30d_amount,

sum(cart_count) cart_last_30d_count,
sum(cart_num) cart_last_30d_num,

sum(favor_count) favor_last_30d_count,

sum(evaluation_good_count) evaluation_last_30d_good_count,
sum(evaluation_mid_count) evaluation_last_30d_mid_count,
sum(evaluation_bad_count) evaluation_last_30d_bad_count,

sku_id
from yp_dws.dws_sku_daycount
where dt>=cast(date_add('day', -30, date '2019-05-07') as varchar)
group by sku_id
)
select
ac.sku_id,
l30.order_last_30d_count,
l30.order_last_30d_num,
l30.order_last_30d_amount,
ac.order_count,
ac.order_num,
ac.order_amount,
l30.payment_last_30d_count,
l30.payment_last_30d_num,
l30.payment_last_30d_amount,
ac.payment_count,
ac.payment_num,
ac.payment_amount,
l30.refund_last_30d_count,
l30.refund_last_30d_num,
l30.refund_last_30d_amount,
ac.refund_count,
ac.refund_num,
ac.refund_amount,
l30.cart_last_30d_count,
l30.cart_last_30d_num,
ac.cart_count,
ac.cart_num,
l30.favor_last_30d_count,
ac.favor_count,
l30.evaluation_last_30d_good_count,
l30.evaluation_last_30d_mid_count,
l30.evaluation_last_30d_bad_count,
ac.evaluation_good_count,
ac.evaluation_mid_count,
ac.evaluation_bad_count
from all_count ac
left join last_30d l30 on ac.sku_id=l30.sku_id;


--每日循环执行
--1.重建临时表
drop table if exists yp_dm.dm_sku_tmp;
create table yp_dm.dm_sku_tmp
(

sku_id string comment 'sku_id',
order_last_30d_count bigint comment '最近30日被下单次数',
order_last_30d_num bigint comment '最近30日被下单件数',
order_last_30d_amount decimal(38,2) comment '最近30日被下单金额',
order_count bigint comment '累积被下单次数',
order_num bigint comment '累积被下单件数',
order_amount decimal(38,2) comment '累积被下单金额',
payment_last_30d_count bigint comment '最近30日被支付次数',
payment_last_30d_num bigint comment '最近30日被支付件数',
payment_last_30d_amount decimal(38,2) comment '最近30日被支付金额',
payment_count bigint comment '累积被支付次数',
payment_num bigint comment '累积被支付件数',
payment_amount decimal(38,2) comment '累积被支付金额',
refund_last_30d_count bigint comment '最近三十日退款次数',
refund_last_30d_num bigint comment '最近三十日退款件数',
refund_last_30d_amount decimal(38,2) comment '最近三十日退款金额',
refund_count bigint comment '累积退款次数',
refund_num bigint comment '累积退款件数',
refund_amount decimal(38,2) comment '累积退款金额',
cart_last_30d_count bigint comment '最近30日被加入购物车次数',
cart_last_30d_num bigint comment '最近30日被加入购物车件数',
cart_count bigint comment '累积被加入购物车次数',
cart_num bigint comment '累积被加入购物车件数',
favor_last_30d_count bigint comment '最近30日被收藏次数',
favor_count bigint comment '累积被收藏次数',
evaluation_last_30d_good_count bigint comment '最近30日好评数',
evaluation_last_30d_mid_count bigint comment '最近30日中评数',
evaluation_last_30d_bad_count bigint comment '最近30日差评数',
evaluation_good_count bigint comment '累积好评数',
evaluation_mid_count bigint comment '累积中评数',
evaluation_bad_count bigint comment '累积差评数'
)
COMMENT '商品主题宽表'
ROW format delimited fields terminated BY '\t'
stored AS orc tblproperties ('orc.compress' = 'SNAPPY');

--2.合并新旧数据
insert into yp_dm.dm_sku_tmp
select
coalesce(new.sku_id,old.sku_id) sku_id,
-- 订单 30天数据
coalesce(new.order_count30,0) order_last_30d_count,
coalesce(new.order_num30,0) order_last_30d_num,
coalesce(new.order_amount30,0) order_last_30d_amount,
-- 订单 累积历史数据
coalesce(old.order_count,0) + coalesce(new.order_count,0) order_count,
coalesce(old.order_num,0) + coalesce(new.order_num,0) order_num,
coalesce(old.order_amount,0) + coalesce(new.order_amount,0) order_amount,
-- 支付单 30天数据
coalesce(new.payment_count30,0) payment_last_30d_count,
coalesce(new.payment_num30,0) payment_last_30d_num,
coalesce(new.payment_amount30,0) payment_last_30d_amount,
-- 支付单 累积历史数据
coalesce(old.payment_count,0) + coalesce(new.payment_count,0) payment_count,
coalesce(old.payment_num,0) + coalesce(new.payment_count,0) payment_num,
coalesce(old.payment_amount,0) + coalesce(new.payment_count,0) payment_amount,
-- 退款单 30天数据
coalesce(new.refund_count30,0) refund_last_30d_count,
coalesce(new.refund_num30,0) refund_last_30d_num,
coalesce(new.refund_amount30,0) refund_last_30d_amount,
-- 退款单 累积历史数据
coalesce(old.refund_count,0) + coalesce(new.refund_count,0) refund_count,
coalesce(old.refund_num,0) + coalesce(new.refund_num,0) refund_num,
coalesce(old.refund_amount,0) + coalesce(new.refund_amount,0) refund_amount,
-- 购物车 30天数据
coalesce(new.cart_count30,0) cart_last_30d_count,
coalesce(new.cart_num30,0) cart_last_30d_num,
-- 购物车 累积历史数据
coalesce(old.cart_count,0) + coalesce(new.cart_count,0) cart_count,
coalesce(old.cart_num,0) + coalesce(new.cart_num,0) cart_num,
-- 收藏 30天数据
coalesce(new.favor_count30,0) favor_last_30d_count,
-- 收藏 累积历史数据
coalesce(old.favor_count,0) + coalesce(new.favor_count,0) favor_count,
-- 评论 30天数据
coalesce(new.evaluation_good_count30,0) evaluation_last_30d_good_count,
coalesce(new.evaluation_mid_count30,0) evaluation_last_30d_mid_count,
coalesce(new.evaluation_bad_count30,0) evaluation_last_30d_bad_count,
-- 评论 累积历史数据
coalesce(old.evaluation_good_count,0) + coalesce(new.evaluation_good_count,0) evaluation_good_count,
coalesce(old.evaluation_mid_count,0) + coalesce(new.evaluation_mid_count,0) evaluation_mid_count,
coalesce(old.evaluation_bad_count,0) + coalesce(new.evaluation_bad_count,0) evaluation_bad_count
from
(
-- dm旧数据
select
sku_id,
order_last_30d_count,
order_last_30d_num,
order_last_30d_amount,
order_count,
order_num,
order_amount ,
payment_last_30d_count,
payment_last_30d_num,
payment_last_30d_amount,
payment_count,
payment_num,
payment_amount,
refund_last_30d_count,
refund_last_30d_num,
refund_last_30d_amount,
refund_count,
refund_num,
refund_amount,
cart_last_30d_count,
cart_last_30d_num,
cart_count,
cart_num,
favor_last_30d_count,
favor_count,
evaluation_last_30d_good_count,
evaluation_last_30d_mid_count,
evaluation_last_30d_bad_count,
evaluation_good_count,
evaluation_mid_count,
evaluation_bad_count
from yp_dm.dm_sku
)old
full outer join
(
-- 30天 和 昨天 的dws新数据
select
sku_id,
sum(if(dt='2019-05-07', order_count,0 )) order_count,
sum(if(dt='2019-05-07',order_num ,0 )) order_num,
sum(if(dt='2019-05-07',order_amount,0 )) order_amount ,
sum(if(dt='2019-05-07',payment_count,0 )) payment_count,
sum(if(dt='2019-05-07',payment_num,0 )) payment_num,
sum(if(dt='2019-05-07',payment_amount,0 )) payment_amount,
sum(if(dt='2019-05-07',refund_count,0 )) refund_count,
sum(if(dt='2019-05-07',refund_num,0 )) refund_num,
sum(if(dt='2019-05-07',refund_amount,0 )) refund_amount,
sum(if(dt='2019-05-07',cart_count,0 )) cart_count,
sum(if(dt='2019-05-07',cart_num,0 )) cart_num,
sum(if(dt='2019-05-07',favor_count,0 )) favor_count,
sum(if(dt='2019-05-07',evaluation_good_count,0 )) evaluation_good_count,
sum(if(dt='2019-05-07',evaluation_mid_count,0 ) ) evaluation_mid_count ,
sum(if(dt='2019-05-07',evaluation_bad_count,0 )) evaluation_bad_count,
sum(order_count) order_count30 ,
sum(order_num) order_num30,
sum(order_amount) order_amount30,
sum(payment_count) payment_count30,
sum(payment_num) payment_num30,
sum(payment_amount) payment_amount30,
sum(refund_count) refund_count30,
sum(refund_num) refund_num30,
sum(refund_amount) refund_amount30,
sum(cart_count) cart_count30,
sum(cart_num) cart_num30,
sum(favor_count) favor_count30,
sum(evaluation_good_count) evaluation_good_count30,
sum(evaluation_mid_count) evaluation_mid_count30,
sum(evaluation_bad_count) evaluation_bad_count30
from yp_dws.dws_sku_daycount
where dt >= cast(date_add('day', -30, date '2019-05-07') as varchar)
group by sku_id
)new
on new.sku_id = old.sku_id;


--3.临时表覆盖宽表
delete from yp_dm.dm_sku;
insert into yp_dm.dm_sku
select * from yp_dm.dm_sku_tmp;