CDA117556

2022-01-21   阅读量: 390

Mysql

上课sql代码总结

扫码加入数据分析学习群

缺失值处理
select
sum(user_id is null),
sum(item_id is null),
sum(item_category is null),
sum(behavior_type is null),
sum(user_geohash is null),
sum(times is null),
sum(amount is null)
from userbehavior;

-- 异常值检查
select min(times),max(times),min(amount),max(amount) from userbehavior;

-- 2.行为转化分析(转化率=当前行为人数/上一行为人数)
select
behavior_type,
count(distinct user_id) as 用户人数,
lag(count(distinct user_id),1) over(order by if(behavior_type='pv',1,if(behavior_type='fav',2,if(behavior_type='cart',3,4)))) as 上一行为用户人数,
ifnull(count(distinct user_id)/lag(count(distinct user_id),1) over(order by if(behavior_type='pv',1,if(behavior_type='fav',2,if(behavior_type='cart',3,4)))),1) as 转化率
from userbehavior_new
group by behavior_type;

select
behavior_type,
count(distinct user_id) as 用户人数,
lag(count(distinct user_id),1) over(order by if(behavior_type='pv',1,if(behavior_type='fav',2,if(behavior_type='cart',3,4)))) as 上一行为用户人数,
ifnull(count(distinct user_id)/lag(count(distinct user_id),1) over(order by if(behavior_type='pv',1,if(behavior_type='fav',2,if(behavior_type='cart',3,4)))),1) as 转化率
from userbehavior_new
where behavior_type in ('pv','cart','buy')
group by behavior_type;

-- 每日浏览—加购—购买的转化率
select
日期,
sum(if(behavior_type='pv',用户人数,0)) as 浏览人数,
sum(if(behavior_type='cart',用户人数,0)) as 加购人数,
sum(if(behavior_type='buy',用户人数,0)) as 购买人数,
sum(if(behavior_type='cart',用户人数,0))/sum(if(behavior_type='pv',用户人数,0)) as 浏览_加购转化率,
sum(if(behavior_type='buy',用户人数,0))/sum(if(behavior_type='cart',用户人数,0)) as 加购_购买转化率
from
(select
日期,
behavior_type,
count(distinct user_id) as 用户人数
from userbehavior_new
where behavior_type in ('pv','cart','buy')
group by 日期,behavior_type) as t
group by 日期;

-- 3.产品贡献定量分析(帕累托分析)(累积销售额百分比=累积销售额/总销售额)
select
item_category,
sum(amount) as 销售额,
sum(sum(amount)) over(order by sum(amount) desc) as 累积销售额,
sum(sum(amount)) over() as 总销售额,
sum(sum(amount)) over(order by sum(amount) desc)/sum(sum(amount)) over() as 累积销售额百分比
from userbehavior_new
where behavior_type='buy'
group by item_category
having sum(sum(amount)) over(order by sum(amount) desc)/sum(sum(amount)) over()<=0.8;#报错:having子句中不能使用开窗函数

select *
from
(select
item_category,
sum(amount) as 销售额,
sum(sum(amount)) over(order by sum(amount) desc) as 累积销售额,
sum(sum(amount)) over() as 总销售额,
sum(sum(amount)) over(order by sum(amount) desc)/sum(sum(amount)) over() as 累积销售额百分比
from userbehavior_new
where behavior_type=&apos;buy&apos;
group by item_category) as t
where 累积销售额百分比<=0.8;

-- 4.用户价值分析
-- 每个用户消费时间间隔、消费频次、消费金额
select
user_id,
max(日期) as 最近一次消费日期,
timestampdiff(day,max(日期),&apos;2014-12-19&apos;) as 消费时间间隔,
count(*) as 消费频次,
sum(amount) as 消费金额
from userbehavior_new
where behavior_type=&apos;buy&apos;
group by user_id;

-- RFM评分
select
user_id,
timestampdiff(day,max(日期),&apos;2014-12-19&apos;) as R,
count(*) as F,
sum(amount) as M,
case when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=6 then 5
when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=12 then 4
when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=18 then 3
when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=24 then 2
else 1
end as R评分,
if(count(*)=1,1,if(count(*)=2,2,if(count(*)=3,3,if(count(*)=4,4,5)))) as F评分,
if(sum(amount)<100,1,if(sum(amount)<200,2,if(sum(amount)<300,3,if(sum(amount)<400,4,5)))) as M评分
from userbehavior_new
where behavior_type=&apos;buy&apos;
group by user_id;

-- RFM均值
select
avg(R评分) as R均值,
avg(F评分) as F均值,
avg(M评分) as M均值
from
(select
user_id,
case when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=6 then 5
when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=12 then 4
when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=18 then 3
when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=24 then 2
else 1
end as R评分,
if(count(*)=1,1,if(count(*)=2,2,if(count(*)=3,3,if(count(*)=4,4,5)))) as F评分,
if(sum(amount)<100,1,if(sum(amount)<200,2,if(sum(amount)<300,3,if(sum(amount)<400,4,5)))) as M评分
from userbehavior_new
where behavior_type=&apos;buy&apos;
group by user_id) as t;

-- RFM重要程度
select
*,
if(R评分>3.5984,&apos;高&apos;,&apos;低&apos;) as R程度,
if(F评分>2.1039,&apos;高&apos;,&apos;低&apos;) as F程度,
if(M评分>2.2051,&apos;高&apos;,&apos;低&apos;) as M程度
from
(select
user_id,
timestampdiff(day,max(日期),&apos;2014-12-19&apos;) as R,
count(*) as F,
sum(amount) as M,
case when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=6 then 5
when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=12 then 4
when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=18 then 3
when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=24 then 2
else 1
end as R评分,
if(count(*)=1,1,if(count(*)=2,2,if(count(*)=3,3,if(count(*)=4,4,5)))) as F评分,
if(sum(amount)<100,1,if(sum(amount)<200,2,if(sum(amount)<300,3,if(sum(amount)<400,4,5)))) as M评分
from userbehavior_new
where behavior_type=&apos;buy&apos;
group by user_id) as t;

-- RFM用户价值
select
*,
case when R程度=&apos;高&apos; and F程度=&apos;高&apos; and M程度=&apos;高&apos; then &apos;重要价值用户&apos;
when R程度=&apos;高&apos; and F程度=&apos;低&apos; and M程度=&apos;高&apos; then &apos;重要发展用户&apos;
when R程度=&apos;低&apos; and F程度=&apos;高&apos; and M程度=&apos;高&apos; then &apos;重要保持用户&apos;
when R程度=&apos;低&apos; and F程度=&apos;低&apos; and M程度=&apos;高&apos; then &apos;重要挽留用户&apos;
when R程度=&apos;高&apos; and F程度=&apos;高&apos; and M程度=&apos;低&apos; then &apos;一般价值用户&apos;
when R程度=&apos;高&apos; and F程度=&apos;低&apos; and M程度=&apos;低&apos; then &apos;一般发展用户&apos;
when R程度=&apos;低&apos; and F程度=&apos;高&apos; and M程度=&apos;低&apos; then &apos;一般保持用户&apos;
else &apos;一般挽留用户&apos;
end as 用户价值分类
from
(select
*,
if(R评分>3.5984,&apos;高&apos;,&apos;低&apos;) as R程度,
if(F评分>2.1039,&apos;高&apos;,&apos;低&apos;) as F程度,
if(M评分>2.2051,&apos;高&apos;,&apos;低&apos;) as M程度
from
(select
user_id,
timestampdiff(day,max(日期),&apos;2014-12-19&apos;) as R,
count(*) as F,
sum(amount) as M,
case when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=6 then 5
when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=12 then 4
when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=18 then 3
when timestampdiff(day,max(日期),&apos;2014-12-19&apos;)<=24 then 2
else 1
end as R评分,
if(count(*)=1,1,if(count(*)=2,2,if(count(*)=3,3,if(count(*)=4,4,5)))) as F评分,
if(sum(amount)<100,1,if(sum(amount)<200,2,if(sum(amount)<300,3,if(sum(amount)<400,4,5)))) as M评分
from userbehavior_new
where behavior_type=&apos;buy&apos;
group by user_id) as t1) as t2;

-- 字段处理:根据times字段增加计算字段用户行为日期、周和小时,排除后续分析不需要的user_geohash字段,并将筛选后的结果保存到新表
select
user_id,
item_id,
item_category,
behavior_type,
date(times) as 日期,
hour(times) as 小时,
date_format(times,&apos;%w&apos;) as 星期,
amount
from (select distinct * from userbehavior) as t;

-- 处理结果保存到视图
create view userbehavior_new as
select
user_id,
item_id,
item_category,
behavior_type,
date(times) as 日期,
hour(times) as 小时,
date_format(times,&apos;%w&apos;) as 星期,
amount
from (select distinct * from userbehavior) as t;

select * from userbehavior_new;


添加CDA认证专家【维克多阿涛】,微信号:【cdashijiazhuang】,提供数据分析指导及CDA考试秘籍。已助千人通过CDA数字化人才认证。欢迎交流,共同成长!
97.7644 8 0 关注作者 收藏

评论(0)


暂无数据

推荐课程