假设检验
![]()
import numpy as np
import pandas as pd
调试代码的数据集:dir(datasets)

![]()
# 加载数据(鸢尾花)
from sklearn import datasets
![]()
dir(datasets)
![]()
iris_raw=datasets.load_iris()
iris_raw
iris_raw.target # target是分类,调出
iris_raw.data
![]()
# array转dataframe
pd.DataFrame(iris_raw.data)
![]()
# 重新命名列名
# array转dataframe
iris=pd.DataFrame(iris_raw.data,columns=iris_raw.feature_names)
iris
# pd.DataFrame(iris_raw.data,columns=['a','b','c','d'])
![]()
# 增加一列
iris['Species']=iris_raw.target # target原本是array
iris
正态检验
![]()

iris['petal length (cm)'].mean()
![]()
# H0:鸢尾花花瓣平均长度是4.2
import statsmodels.stats.weightstats as sw
sw.ztest(iris['petal length (cm)'],value=4.2)
#(-3.066548320028344, 0.0021654580512200875) 分别对应统计值和P值



t检验
![]()
# H0: 花瓣平均长度为4.0
import scipy.stats as ss
ss.ttest_1samp(iris['petal length (cm)'],popmean=4.0)

![]()
# 取出数值
stats_val,p_val=ss.ttest_1samp(iris['petal length (cm)'],popmean=4.0)
print(stats_val,p_val)
双样本检验
![]()
iris['Species'].unique()

![]()
# 山鸢尾和可变色鸢尾,花瓣长度是否有差异
![]()
iris['petal length (cm)']
![]()
import scipy.stats as ss

![]()
ss.ttest_ind(iris[iris['Species']==0]['petal length (cm)'], iris[iris['Species']==1]['petal length (cm)'])
# Pvalue小于0.025,拒绝原假设

348.8459
7
0
关注作者
收藏
发表评论
暂无数据


