qin393430758

2021-03-10   阅读量: 532

Python

xgb_fill 程序报出问题

class xgb_fill(BaseEstimator, TransformerMixin): #建立一个xgb_fill的填补方法


def __init__(self,

num_list: list = None,

cate_list: list = None,

diff_num: int =8,

random_state: int = 0):

self.num_list = num_list

self.cate_list = cate_list

self.diff_num = diff_num

self.random_state = random_state

self.xgb_cla_dict = {}

self.xgb_reg_dict = {}


def fit(self, X, y=None):

from tqdm import tqdm

X = X.copy()

if self.num_list is None:

self.num_list = []

for col in X.columns:

kind = get_kind(x=X[col], diff_limit=self.diff_num)

if kind == 'numeric':

self.num_list.append(col)

if self.cate_list is None:

self.cate_list = []

for col in X.columns:

kind = get_kind(x=X[col], diff_limit=self.diff_num)

if kind == 'categorical':

self.cate_list.append(col)

for col in tqdm(self.cate_list):

file = X.copy()

if file[col].isnull().any():

df = pd.get_dummies(file, columns=[i for i in self.cate_list if i != col],

prefix=[i for i in self.cate_list if i != col],

dummy_na=True)

not_null = df.dropna(subset=[col])

x_ = not_null.drop([col], axis=1)

y_ = not_null[col]

xgb_cla = xgb.XGBClassifier(random_state=self.random_state)

xgb_cla.fit(x_, y_)

self.xgb_cla_dict[col] = xgb_cla


for col in tqdm(self.num_list):

file = X.copy()

if file[col].isnull().any():

df = pd.get_dummies(file, columns=self.cate_list, dummy_na=True, prefix=self.cate_list)

not_null = df.dropna(subset=[col])

x_ = not_null.drop([col], axis=1)

y_ = not_null[col]

xgb_reg = xgb.XGBRegressor(random_state=self.random_state, objective='reg:squarederror')

xgb_reg.fit(x_, y_)

self.xgb_reg_dict[col] = xgb_reg

print('fit xgb fill the Na success!')

return self


def transform(self, X):

X = X.copy()

from tqdm import tqdm

for col in tqdm(self.cate_list):

file = X.copy()

if file[col].isnull().any():

df = pd.get_dummies(file, columns=[i for i in self.cate_list if i != col],

prefix=[i for i in self.cate_list if i != col],

dummy_na=True)

not_null = df.dropna(subset=[col])

null = df.drop(not_null.index)

null[col] = self.xgb_cla_dict[col].predict(null.drop([col], axis=1))

X[col] = pd.concat([null, not_null], axis=0)[col]

else:

X[col] = file[col]


for col in tqdm(self.num_list):

file = X.copy()

if file[col].isnull().any():

df = pd.get_dummies(file, columns=self.cate_list, dummy_na=True, prefix=self.cate_list)

not_null = df.dropna(subset=[col])

null = df.drop(not_null.index)

null[col] = self.xgb_reg_dict[col].predict(null.drop([col], axis=1))

X[col] = pd.concat([null, not_null], axis=0)[col]

else:

X[col] = file[col]

print('transform xgb fill the NA success!')

return X




[21:55:25] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
[21:55:25] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'multi:softprob' was changed from 'merror' to 'mlogloss'. Explicitly set eval_metric if you'd like to restore the old behavior.


添加CDA认证专家【维克多阿涛】,微信号:【cdashijiazhuang】,提供数据分析指导及CDA考试秘籍。已助千人通过CDA数字化人才认证。欢迎交流,共同成长!
60.0000 1 0 关注作者 收藏

评论(0)


暂无数据

推荐帖子

推荐课程