当前位置: 代码迷 >> 综合 >> 数据仓库笔试题-pdd
  详细解决方案

数据仓库笔试题-pdd

热度:41   发布时间:2024-03-06 17:34:36.0

题目:商品活动表 goods_activity,字段id主键、goods_id 商品id,act_id 活动id,create_time  活动创建时间 、status 上线状态
2 上线 ,3 下线 

问题一、每个活动的每个商品 的开始时间和结束时间?

问题二、假设数据中有脏数据,同一个活动可能 两次上线,也可能两次结束,需要把活动上线重复的状态按照时间取第一条,结束上线状态重复的按时间取最后一条。

-- 第一题:
-- id,goods_id,act_id,create_time
-- asdf345,123,bybt123,2020-05-01,2
-- asdf346,123,bybt123,2020-05-10,3
-- asdf347,123,bybt123,2020-06-15,2  
-- asdf348,123,bybt123,2020-06-20,3

-- ->
-- goods_id,act_id,online_dt,offline_dt
-- 123,bybt123,2020-05-01,2020-05-10
-- 123,bybt123,2020-06-15,2020-06-20

select
goods_id,act_id,online_dt,offline_dt
from 
(select goods_id,act_id,create_time as online_dt,lead(create_time,1,'9999-12-31')  over(partition by goods_id,act_id order by create_time) as offline_dt,statusfrom goods_activity
) a
where status=2


-- 第二题:
-- id,goods_id,act_id,create_time    lag  lead
-- asdf341,123,bybt123,2020-05-01,2   0   2 
-- asdf343,123,bybt123,2020-05-02,2   2   3 
-- asdf34q,123,bybt123,2020-05-10,3   2   2 
-- asdf346,123,bybt123,2020-06-15,2   3   3 
-- asdf348,123,bybt123,2020-06-20,3   2   3 
-- asdf350,123,bybt123,2020-06-21,3   3   0 

-- ->
-- goods_id,act_id,online_dt,offline_dt
-- 123,bybt123,2020-05-01,2020-05-10
-- 123,bybt123,2020-06-15,2020-06-21

selectgoods_id,act_id,online_dt,offline_dt
from 
(select goods_id,act_id,create_time as online_dt,lead(create_time,1,'9999-12-31')  over(partition by goods_id,act_id order by create_time) as offline_dt,statusfrom (selectgoods_id,act_id,create_time,status,last_status,lag_statusfrom (select goods_id,act_id,create_time,status,lead(status,1,0)  over(partition by goods_id,act_id order by create_time) as last_status,-- row_number() over(partition by goods_id,act_id,status order by create_time)  as lag(status,1,0)   over(partition by goods_id,act_id order by create_time) as lag_statusfrom (select 'asdf341' as id ,123 as goods_id,'bybt123' as act_id, '2020-05-01' as create_time, 2 as status UNiOn allselect 'asdf343' as id ,123 as goods_id,'bybt123' as act_id, '2020-05-02' as create_time, 2 as status UNiOn allselect 'asdf34q' as id ,123 as goods_id,'bybt123' as act_id, '2020-05-10' as create_time, 3 as status UNiOn allselect 'asdf346' as id ,123 as goods_id,'bybt123' as act_id, '2020-06-15' as create_time, 2 as status UNiOn allselect 'asdf348' as id ,123 as goods_id,'bybt123' as act_id, '2020-06-20' as create_time, 3 as status UNiOn allselect 'asdf350' as id ,123 as goods_id,'bybt123' as act_id, '2020-06-21' as create_time, 3 as status )goods_activity) awhere case when status=2 and status-lag_status <>0  then true when status=3 and status-last_status <>0  then true end ) a
) a
where status=2