请问最后一步为什么要group by count1呀?count1不是常数列吗?
关于问题的背景描述和相关数据的下载见另一个帖子
https://www.cda.cn/discuss/post/details/60273784c58002671ea20afd
下载完csv数据后需要先将数据导入到mysql里面再进行查询
import pandas as pd
import numpy as np
#将csv数据变成数据框
data1=pd.read_csv("D:\\A2021_02_13.csv")
#-----------------------------------------------------------------
# 接下来将数据框导出到mydql数据库中
#---------------------------------------------------------------------------------
#创建python和mysql的连接通道
from sqlalchemy import create_engine
connection1=create_engine("mysql+pymysql://root:12345@localhost:3306/test1?charset=utf8")
#create_engine的用法可以用help(sqlalchemy.engine)看下。
#将数据框data1借助通道倒入到mysql test1数据库的biao1
pd.io.sql.to_sql(data1,"biao1",connection1,schema="test1",if_exists="replace")
下面我们用mysql代码来实现查询计算
涉及到的关键词有 mysql 创建临时表 删除临时表 两个整数字段如何相除
drop temporary table if exists temp0;
create temporary table temp0
select 团长ID,STR_TO_DATE(审核通过日期,'%Y-%m-%d' ) as 审核通过日期1,
datediff(统计日期,审核通过日期) as t,
sum(销量) as 销量1,
max(if(销量>0,1,0)) 是否消费
from biao1
where (STR_TO_DATE(审核通过日期,'%Y-%m-%d' )>=str_to_date("2021-01-01",'%Y-%m-%d')
and STR_TO_DATE(审核通过日期,'%Y-%m-%d' )<=str_to_date("2021-01-24",'%Y-%m-%d')
and datediff(biao1.统计日期,biao1.审核通过日期)>=0
and datediff(统计日期,审核通过日期)<=7)
group by biao1.审核通过日期,biao1.团长ID,biao1.统计日期
order by 审核通过日期,团长ID,t
;
drop temporary table if exists temp1;
create temporary table temp1
select 审核通过日期1,团长ID,
max(case t when 0 then 是否消费 else NULL end) as t0,
max(case t when 1 then 是否消费 else NULL end) as t1,
max(case t when 2 then 是否消费 else NULL end) as t2,
max(case t when 3 then 是否消费 else NULL end) as t3,
max(case t when 4 then 是否消费 else NULL end) as t4,
max(case t when 5 then 是否消费 else NULL end) as t5,
max(case t when 6 then 是否消费 else NULL end) as t6,
max(case t when 7 then 是否消费 else NULL end) as t7
from temp0 group by 团长ID;
drop temporary table if exists temp2;
create temporary table temp2
select temp1.*,1 as count1,
(if((t0+t1)=2,1,0)) as 是否次日留存,
(if((t0+t1+t2+t3+t4+t5+t6)=7,1,0)) as 是否7日留存
from temp1;
drop temporary table if exists temp3;
create temporary table temp3
select sum(count1) as 新注册总人数,
sum(是否次日留存) as 次日留存人数,
sum(是否7日留存) as 7日留存人数
from temp2 group by count1;
select *,
(次日留存人数*1.0/新注册总人数) as 次日留存率,
(7日留存人数*1.0/新注册总人数) as 7日留存率
from temp3;