df1=pd.read_excel('.xlxs',sheetname='')
df2=...
df=pd.concat([df1,df2,df3],axis=0) #axis=0>>>detail_id
df.reset_index(drop=True,inplace=True)
df.shape
df,info()
df.duplicated().sum()
df.dropna(axis=1,inplace=True)
df[''].unique()
df.drop(['A','B'],axis=1,inplace=True)
df.head()
df['dish_names']=df['dish_names'].str,strip('[]')
[33]:
#繁体简体转换
from langconv import *
def simple2tradition(line):
#将简体转换成繁体
line = Converter('zh-hant').convert(line)
return line
def tradition2simple(line):
# 将繁体转换成简体
line = Converter('zh-hans').convert(line)
return line
simplified_sentence = tradition2simple('憂郁的臺灣烏龜')
print(simplified_sentence)
traditional_sentence = simple2tradition('忧郁的台湾乌龟')
print(traditional_sentence)
忧郁的台湾乌龟
憂郁的臺灣烏龜
from langconv import *
def tradition2simple(line):
# 将繁体转换成简体
line = Converter('zh-hans').convert(line)
return line
df['dish_names']=df['dish_names'].apply(tradition2simple)
df.insert(6,'money',df['counts']*df['amounts'])
df['order_id'].nunique()
df['dish_names'].nunique()
df.drop_duplicates(subset='dish_names')['amounts'].mean()
df['dish_names'].value_counts()
df[['dish_names','amounts']].groupby('dish_names').sum().nlargest(10,columns='amounts')
##把detail_id看作是用户id
df[['detail_id','counts']].groupby('detail_id').sum().nlargest(10,columns='counts')
df[['detail_id','money']].groupby('detail_id').sum().nlargest(10,columns='money')
df1=df[['detail_id','money']].groupby('detail_id').sum()
df2=df[['detail_id','counts']].groupby('detail_id').sum()
s=df1['money']/df2['counts']
s.nlargest(10)
df['hour']=df['place_order_time'].dt.hour
df['hour'].value_counts().sort_index().plot(kind='bar')
df['day']=df['place_order_time'].dt.day
df['day'].value_counts().sort_index().plot(kind='bar')
df['weekday']=df['place_order_time'].dt.weekday+1
df['weekday'].value_counts().sort_index().plot(kind='bar')
·
·