loan=pd.read_csv(r"C:\Users\86153\Desktop\python考试题\12月考试\data\small_loan.csv",encoding="utf-8")

loan.head(10)

loan.drop("id",inplace=True,axis=1)

x_=loan.drop(["response"],axis=1)

y_=loan["response"].values

#区分类别

def get_kind(x:pd.Series,diff_limit:int=10):

x=x.astype(str)

x=x.str.extract(r"(^(\-|)(?=.*\d)\d*(?:\.\d*)?$")[0]

x.dropna(inplace=True)

if x.nunique()>diff_limit:

kind="numeric"

else:

kind="categorical"

return kind

#处理错误值

class wrong_value_fillna(BaseEstimator,TransformerMixin):

def __init__(self,

num_list:list=None,

cate_list:list=None,

wrong_value:list=None,

diff_num:int=10):

self.num_list=num_list

self.cate_list=cate_list

self.diff_num=diff_num

self.wrong_value=wrong_value

def fit(self,X,y=None):

X=X.copy()

if self.num_list is None:

self.num_list=[]

for col in X.columns:

kind=get_kind(x=X[col],diff_limit=self.diff_num)

if kind=="numeric":

self.num_list.append(col)

if self.cate_list is None:

self.cate_list=[]

for col in X.columns:

kind=get_kind(x=X[col],diff_limit=self.diff_num)

if kind=="categorical":

self.cate_list.append(col)

return(self)

def transform(self,X,y=None):

X=X.copy()

X.replace(self.wrong_value,np.nan,inplace=True)

for col in X.columns:

if get_kind(X[col])=="numeric":

X[col]=X[col].astype("float")

else:

X[col]=X[col].astype("object")

return X

wvf=wrong_value_fillna(wrong_value=[".","?"])

x_=wvf.fit_transform(x_,y_)


老师,前面的代码是这样的,运行都没有报错,运行到最后一步时报missing)错误

0 0 1