df=pd.read_csv('.csv')
df.shape
df.info()
df.describe(include='all')
#If include='all' is provided as an option, the result will include a union of attributes of each type. The include and exclude parameters can be used to limit which columns in a DataFrame are analyzed for the output.
df.duplicated().sum()
df.duplicate()
df['卖家'].unique()
df['卖家'].nunique()
df['位置'].unique()
df['位置'].nunique()
pd.nlargest(10,df['价格'])
df.nlargest(10,columns='价格')
位置放在 成交量之后
pd.insert(3,['成交额'])=df['价格']*df['成交量'] #原地修改
pd
df[['宝贝','成交额']].nlargest(10,'成交额')
[['宝贝','成交额']].pivot(index='宝贝',aggfunc='sum').nlargest(10,columns='成交额')
df1=df.nlargest(10,'成交额')
df1[['成交额','宝贝']]
df1=df[df['位置']=='江苏']
df1['宝贝','成交额'].pivot(index='位置',aggfunc='sum').nlargest(10,columns='成交额')
df[df['卖家'].str.contains('旗舰店')]
df2=df[df['成交额','卖家']].pivot_table(index='卖家',aggfunc='sum').nlargest(10,columns='成交额')
df2[0:10]
df2['累计成交额']=df2['成交额'].cumsum()
df2['累计占比']=df2['累计成交额']/df2['成交额'].sum()
df2
s=df2['累计占比'].reset_index(drop=True)
s.index=s.index+1
s.plot(kind='line')
df2[0:10].plot(kind='line')