with a1 as ( select*, row_number() over (partitionby cookieid orderby pv) as row4 -- 根据cookieid分组,按照pv进行排序 from website_pv_info ) select*from a1 where row4<=4;
1 2 3 4 5
with a1 as ( select *, ntile(3) over (partitionby cookieid orderby pv) as nt from website_pv_info ) select*from a1 where nt=1;
-- 需求: 根据cookieID进行分组, 获取每组点击量最高的前4名数据, 这个就是经典的案例: 分组求TopN -- Step1: 根据cookieID进行分组, 根据点击量进行排名. select *, dense_rank() over (partitionby cookieid orderby pv desc) drk from website_pv_info where drk <=4; -- 报错. -- 细节: where只能筛选表中已经有的列(数据) -- Step2: 把上述的查询结果当做一张表, 然后从中获取我们要的数据即可. with t1 as ( select *, dense_rank() over (partitionby cookieid orderby pv desc) drk from website_pv_info ) select*from t1 where drk <=4;
-- ntile(数字,表示分成几份) 采用均分策略, 每份之间的差值不超过1, 优先参考最小的那个部分, 即: 7分成3份, 则是: 3, 2, 2 select *, ntile(3) over (partitionby cookieid orderby pv desc) nt from website_pv_info;
-- 需求: 按照cookieid分组, 按照点击量降序排列, 只要每组前三分之一的数据. with t1 as ( select *, ntile(3) over (partitionby cookieid orderby pv desc) nt from website_pv_info ) select*from t1 where nt =1;