缺失值处理
select
sum(user_id is null),
sum(item_id is null),
sum(item_category is null),
sum(behavior_type is null),
sum(user_geohash is null),
sum(times is null),
sum(amount is null)
from userbehavior;
-- 异常值检查
select min(times),max(times),min(amount),max(amount) from userbehavior;
-- 2.行为转化分析(转化率=当前行为人数/上一行为人数)
select
behavior_type,
count(distinct user_id) as 用户人数,
lag(count(distinct user_id),1) over(order by if(behavior_type='pv',1,if(behavior_type='fav',2,if(behavior_type='cart',3,4)))) as 上一行为用户人数,
ifnull(count(distinct user_id)/lag(count(distinct user_id),1) over(order by if(behavior_type='pv',1,if(behavior_type='fav',2,if(behavior_type='cart',3,4)))),1) as 转化率
from userbehavior_new
group by behavior_type;
select
behavior_type,
count(distinct user_id) as 用户人数,
lag(count(distinct user_id),1) over(order by if(behavior_type='pv',1,if(behavior_type='fav',2,if(behavior_type='cart',3,4)))) as 上一行为用户人数,
ifnull(count(distinct user_id)/lag(count(distinct user_id),1) over(order by if(behavior_type='pv',1,if(behavior_type='fav',2,if(behavior_type='cart',3,4)))),1) as 转化率
from userbehavior_new
where behavior_type in ('pv','cart','buy')
group by behavior_type;
-- 每日浏览—加购—购买的转化率
select
日期,
sum(if(behavior_type='pv',用户人数,0)) as 浏览人数,
sum(if(behavior_type='cart',用户人数,0)) as 加购人数,
sum(if(behavior_type='buy',用户人数,0)) as 购买人数,
sum(if(behavior_type='cart',用户人数,0))/sum(if(behavior_type='pv',用户人数,0)) as 浏览_加购转化率,
sum(if(behavior_type='buy',用户人数,0))/sum(if(behavior_type='cart',用户人数,0)) as 加购_购买转化率
from
(select
日期,
behavior_type,
count(distinct user_id) as 用户人数
from userbehavior_new
where behavior_type in ('pv','cart','buy')
group by 日期,behavior_type) as t
group by 日期;
-- 3.产品贡献定量分析(帕累托分析)(累积销售额百分比=累积销售额/总销售额)
select
item_category,
sum(amount) as 销售额,
sum(sum(amount)) over(order by sum(amount) desc) as 累积销售额,
sum(sum(amount)) over() as 总销售额,
sum(sum(amount)) over(order by sum(amount) desc)/sum(sum(amount)) over() as 累积销售额百分比
from userbehavior_new
where behavior_type='buy'
group by item_category
having sum(sum(amount)) over(order by sum(amount) desc)/sum(sum(amount)) over()<=0.8;#报错:having子句中不能使用开窗函数
select *
from
(select
item_category,
sum(amount) as 销售额,
sum(sum(amount)) over(order by sum(amount) desc) as 累积销售额,
sum(sum(amount)) over() as 总销售额,
sum(sum(amount)) over(order by sum(amount) desc)/sum(sum(amount)) over() as 累积销售额百分比
from userbehavior_new
where behavior_type='buy'
group by item_category) as t
where 累积销售额百分比<=0.8;
-- 4.用户价值分析
-- 每个用户消费时间间隔、消费频次、消费金额
select
user_id,
max(日期) as 最近一次消费日期,
timestampdiff(day,max(日期),'2014-12-19') as 消费时间间隔,
count(*) as 消费频次,
sum(amount) as 消费金额
from userbehavior_new
where behavior_type='buy'
group by user_id;
-- RFM评分
select
user_id,
timestampdiff(day,max(日期),'2014-12-19') as R,
count(*) as F,
sum(amount) as M,
case when timestampdiff(day,max(日期),'2014-12-19')<=6 then 5
when timestampdiff(day,max(日期),'2014-12-19')<=12 then 4
when timestampdiff(day,max(日期),'2014-12-19')<=18 then 3
when timestampdiff(day,max(日期),'2014-12-19')<=24 then 2
else 1
end as R评分,
if(count(*)=1,1,if(count(*)=2,2,if(count(*)=3,3,if(count(*)=4,4,5)))) as F评分,
if(sum(amount)<100,1,if(sum(amount)<200,2,if(sum(amount)<300,3,if(sum(amount)<400,4,5)))) as M评分
from userbehavior_new
where behavior_type='buy'
group by user_id;
-- RFM均值
select
avg(R评分) as R均值,
avg(F评分) as F均值,
avg(M评分) as M均值
from
(select
user_id,
case when timestampdiff(day,max(日期),'2014-12-19')<=6 then 5
when timestampdiff(day,max(日期),'2014-12-19')<=12 then 4
when timestampdiff(day,max(日期),'2014-12-19')<=18 then 3
when timestampdiff(day,max(日期),'2014-12-19')<=24 then 2
else 1
end as R评分,
if(count(*)=1,1,if(count(*)=2,2,if(count(*)=3,3,if(count(*)=4,4,5)))) as F评分,
if(sum(amount)<100,1,if(sum(amount)<200,2,if(sum(amount)<300,3,if(sum(amount)<400,4,5)))) as M评分
from userbehavior_new
where behavior_type='buy'
group by user_id) as t;
-- RFM重要程度
select
*,
if(R评分>3.5984,'高','低') as R程度,
if(F评分>2.1039,'高','低') as F程度,
if(M评分>2.2051,'高','低') as M程度
from
(select
user_id,
timestampdiff(day,max(日期),'2014-12-19') as R,
count(*) as F,
sum(amount) as M,
case when timestampdiff(day,max(日期),'2014-12-19')<=6 then 5
when timestampdiff(day,max(日期),'2014-12-19')<=12 then 4
when timestampdiff(day,max(日期),'2014-12-19')<=18 then 3
when timestampdiff(day,max(日期),'2014-12-19')<=24 then 2
else 1
end as R评分,
if(count(*)=1,1,if(count(*)=2,2,if(count(*)=3,3,if(count(*)=4,4,5)))) as F评分,
if(sum(amount)<100,1,if(sum(amount)<200,2,if(sum(amount)<300,3,if(sum(amount)<400,4,5)))) as M评分
from userbehavior_new
where behavior_type='buy'
group by user_id) as t;
-- RFM用户价值
select
*,
case when R程度='高' and F程度='高' and M程度='高' then '重要价值用户'
when R程度='高' and F程度='低' and M程度='高' then '重要发展用户'
when R程度='低' and F程度='高' and M程度='高' then '重要保持用户'
when R程度='低' and F程度='低' and M程度='高' then '重要挽留用户'
when R程度='高' and F程度='高' and M程度='低' then '一般价值用户'
when R程度='高' and F程度='低' and M程度='低' then '一般发展用户'
when R程度='低' and F程度='高' and M程度='低' then '一般保持用户'
else '一般挽留用户'
end as 用户价值分类
from
(select
*,
if(R评分>3.5984,'高','低') as R程度,
if(F评分>2.1039,'高','低') as F程度,
if(M评分>2.2051,'高','低') as M程度
from
(select
user_id,
timestampdiff(day,max(日期),'2014-12-19') as R,
count(*) as F,
sum(amount) as M,
case when timestampdiff(day,max(日期),'2014-12-19')<=6 then 5
when timestampdiff(day,max(日期),'2014-12-19')<=12 then 4
when timestampdiff(day,max(日期),'2014-12-19')<=18 then 3
when timestampdiff(day,max(日期),'2014-12-19')<=24 then 2
else 1
end as R评分,
if(count(*)=1,1,if(count(*)=2,2,if(count(*)=3,3,if(count(*)=4,4,5)))) as F评分,
if(sum(amount)<100,1,if(sum(amount)<200,2,if(sum(amount)<300,3,if(sum(amount)<400,4,5)))) as M评分
from userbehavior_new
where behavior_type='buy'
group by user_id) as t1) as t2;
-- 字段处理:根据times字段增加计算字段用户行为日期、周和小时,排除后续分析不需要的user_geohash字段,并将筛选后的结果保存到新表
select
user_id,
item_id,
item_category,
behavior_type,
date(times) as 日期,
hour(times) as 小时,
date_format(times,'%w') as 星期,
amount
from (select distinct * from userbehavior) as t;
-- 处理结果保存到视图
create view userbehavior_new as
select
user_id,
item_id,
item_category,
behavior_type,
date(times) as 日期,
hour(times) as 小时,
date_format(times,'%w') as 星期,
amount
from (select distinct * from userbehavior) as t;
select * from userbehavior_new;








暂无数据