2021-03-14
阅读量:
6276
AttributeError:'feature_select' object has no attribute 'select_list' 代码不明白为什么报错
def get_kind(x: pd.Series, diff_limit: int = 10): x = x.astype('str') x = x.str.extract(r'(^(\-|)(?=.*\d)\d*(?:\.\d*)?$)')[0] x.dropna(inplace=True) if x.nunique() > diff_limit: kind = 'numeric' else: kind = 'categorical' return kind class feature_select(BaseEstimator, TransformerMixin): def __init__(self, num_list: list = None, cate_list: list = None, num_method: str='sys', cate_method: str='sys', diff_num: int = 10, pos_label: str = 1, show_df: bool = False, ): self.num_list = num_list self.cate_list = cate_list self.num_method= num_method self.cate_method= cate_method self.diff_num = diff_num self.pos_label = pos_label self.show_df = show_df self.selete_list = [] def fit (self, X, y=None): X=X.copy() from scipy import stats if self.num_list is None: self.num_list = [] for col in X.columns: kind = get_kind(x=X[col], diff_limit=self.diff_num) if kind == 'numeric': self.num_list.append(col) if self.cate_list is None: self.cate_list = [] for col in X.columns: kind = get_kind(x=X[col], diff_limit=self.diff_num) if kind == 'categorical': self.cate_list.append(col) X['y']=y yes=X[X['y'] == self.pos_label] yes.reset_index(drop=True,inplace=True) no=X[X['y'] != self.pos_label] no.reset_index(drop=True,inplace=True) del X['y'] sys_cate_list, kf_list, kf_p_list =[],[],[] sys_num_list, t_list, p_value_list, anova_f_list, anova_p_list=[],[],[],[],[] if self.cate_method == 'sys' or self.show_df is True: for obj in self.cate_list: value_list = list(X[obj].unique()) value_sum = 0 for value in value_list: support_yes = (yes[yes[obj] == value].shape[0] +1)/ ( yes.shape[0]+1) support_no = (no[no[obj] == value].shape[0] +1) / (no.shape[0]+1) confidence_yes = support_yes / (support_yes + support_no) value_sum += abs(2 * confidence_yes - 1) * (X[X[obj] == value].shape[0] / X.shape[0]) sys_cate_list.append(value_sum) if value_sum >=0.1: self.select_list.append(obj) if self.cate_method == 'kf' or self.show_df is True: for obj in self.cate_list: df_obj=pd.get_dummies(X[obj],prefix=obj) df_obj['result']=y df_obj=df_obj,groupby('result').sum() obs = df_obj.values kf = stats.chi2_contingency(obs) ''' chi2: The test statistic p: p-value dof: Degrees of freedom expected: The expected frequencies, based on the marginal sums of the table. ''' chi2, p, dof, expect = kf kf_list.append(chi2) kf_p_list.append(p) if p < 0.05: self.select_list.append(obj) if self.num_method == 'sys' or self.show_df is True: for num in self.num_list: mean_c1 = no[num].mean() std_c1 = no[num].std() mean_c2 = yes[num].mean() std_c2 = yes[num].std() value_sum=abs(mean_c1 - mean_c2) / (std_c1 + std_c2) * 2 sys_num_list.append(value_sum) if value_sum >=0.1: self.select_list.append(num) if self.num_method == 't' or self.show_df is True: for num in num_list: t_t, t_p = stats.ttest_ind(yes[num], no[num], equal_var=False, nan_policy='omit') # 'omit'忽略nan值执行计算 t_list.append(t_t) p_value_list.append(t_p) if t_p < 0.05: self.select_list.append(num) if self.num_method == 'anova' or self.show_df is True: for num in num_list: anova_f, anova_p = stats.f_oneway(yes[num], no[num]) anova_f_list.append(anova_f) anova_p_list.append(anova_p) if anova_p < 0.05: self.select_list.append(num) if self.show_df is True: dic1 = {'categorical': self.cate_list, 'importance_': sys_cate_list, 'Kf-Value': kf_list, 'Kf_P-Value': kf_p_list, } df = pd.DataFrame(dic1, columns=['categorical', 'importance_', 'Kf-Value', 'Kf_P-Value']) df.sort_values(by='Kf_P-Value', inplace=True) print(df,'\n') dic2 = {'numeric':self. num_list, 'importance_': sys_num_list, 'T-Value': t_list, 'P-value': p_value_list, 'Anova-F-Value': anova_f_list, 'Anova-P-value': anova_p_list} df = pd.DataFrame(dic2, columns=['numeric', 'importance_', 'T-Value', 'P-value', 'Anova-F-Value', 'Anova-P-value']) df.sort_values(by='Anova-P-value', inplace=True) print(df,'\n') self.select_list= list(set(self.select_list)) print('After select attr:', self.select_list) return self def transform(self,X): X=X.copy() print('attr select success!') return X[self.select_list] fs=feature_select(pos_label=’yes’,show_df=True) x_train=fs.fit_transform(x_train, y_train)
fit的时候报错,提示如下:
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last)in1 fs=feature_select(pos_label='1',show_df=True) ----> 2 x_=fs.fit_transform(x_, y_) C:\ProgramData\Anaconda3\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params) 700 else: 701 # fit method of arity 2 (supervised transformation) --> 702 return self.fit(X, y, **fit_params).transform(X) 703 704in fit(self, X, y) 56 sys_cate_list.append(value_sum) 57 if value_sum >=0.1: ---> 58 self.select_list.append(obj) 59 60 if self.cate_method == 'kf' or self.show_df is True: AttributeError: 'feature_select' object has no attribute 'select_list'






评论(4)

推荐帖子
0条评论
0条评论
0条评论