fit的时候报错了,报错原因为
ValueError: The label must consist of integer labels of form 0, 1, 2, ..., [num_class - 1].请问怎么解决?代码.docx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import xgboost as xgb
df1 = pd.read_excel('d:/python/data/data.xlsx')
x_ = df1.drop(['Target'], axis=1)
y_ = df1['Target'].values
def get_kind(x: pd.Series, diff_limit: int = 10):
x = x.astype('str')
x = x.str.extract(r'(^(\-|)(?=.*\d)\d*(?:\.\d*)?$)')[0]
x.dropna(inplace=True)
if x.nunique() > diff_limit:
kind = 'numeric'
else:
kind = 'categorical'
return kind
class xgb_fill(BaseEstimator, TransformerMixin):
def __init__(self,
num_list: list = None,
cate_list: list = None,
diff_num: int = 8,
random_state: int = 0):
self.num_list = num_list
self.cate_list = cate_list
self.diff_num = diff_num
self.random_state = random_state
self.xgb_cla_dict = {}
self.xgb_reg_dict = {}
def fit(self, X, y=None):
from tqdm import tqdm
X = X.copy()
if self.num_list is None:
self.num_list = []
for col in X.columns:
kind = get_kind(x=X[col], diff_limit=self.diff_num)
if kind == 'numeric':
self.num_list.append(col)
if self.cate_list is None:
self.cate_list = []
for col in X.columns:
kind = get_kind(x=X[col], diff_limit=self.diff_num)
if kind == 'categorical':
self.cate_list.append(col)
for col in tqdm(self.cate_list):
file = X.copy()
if file[col].isnull().any():
df = pd.get_dummies(file, columns=[i for i in self.cate_list if i != col],
prefix=[i for i in self.cate_list if i != col],
dummy_na=True)
not_null = df.dropna(subset=[col])
x_ = not_null.drop([col], axis=1)
y_ = not_null[col]
xgb_cla = xgb.XGBClassifier(random_state=self.random_state,use_label_encoder=False)
xgb_cla.fit(x_, y_)
self.xgb_cla_dict[col] = xgb_cla
for col in tqdm(self.num_list):
file = X.copy()
if file[col].isnull().any():
df = pd.get_dummies(file, columns=self.cate_list, dummy_na=True, prefix=self.cate_list)
not_null = df.dropna(subset=[col])
x_ = not_null.drop([col], axis=1)
y_ = not_null[col]
xgb_reg = xgb.XGBRegressor(random_state=self.random_state, objective='reg:squarederror')
xgb_reg.fit(x_, y_)
self.xgb_reg_dict[col] = xgb_reg
print('fit xgb fill the Na success!')
return self
def transform(self, X):
X = X.copy()
from tqdm import tqdm
for col in tqdm(self.cate_list):
file = X.copy()
if file[col].isnull().any():
df = pd.get_dummies(file, columns=[i for i in self.cate_list if i != col],
prefix=[i for i in self.cate_list if i != col],
dummy_na=True)
not_null = df.dropna(subset=[col])
null = df.drop(not_null.index)
null[col] = self.xgb_cla_dict[col].predict(null.drop([col], axis=1))
X[col] = pd.concat([null, not_null], axis=0)[col]
else:
X[col] = file[col]
for col in tqdm(self.num_list):
file = X.copy()
if file[col].isnull().any():
df = pd.get_dummies(file, columns=self.cate_list, dummy_na=True, prefix=self.cate_list)
not_null = df.dropna(subset=[col])
null = df.drop(not_null.index)
null[col] = self.xgb_reg_dict[col].predict(null.drop([col], axis=1))
X[col] = pd.concat([null, not_null], axis=0)[col]
else:
X[col] = file[col]
print('transform xgb fill the NA success!')
return X
xgbf = xgb_fill()
x_ = xgbf.fit_transform(x_) 运行到这步就报错了