我成功地在管道中使用SMOTEENN和RF实现了一个模型。这样地:
import random
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from imblearn.combine import SMOTEENN
from imblearn.pipeline import Pipeline
X_train
,
X_test
,
y_train
y_test
矩阵,我成功地执行了sklearn RandomizedSearch,如下所示:
seed = 1706
knn = 10
smoted = SMOTE(sampling_strategy = 'auto',
k_neighbors = knn,
random_state = seed)
mydata = pd.read_csv(datapath)
params_rf = {
'rf__max_depth':[8, 14, 20, 26],
'rf__min_samples_leaf':[8, 15, 22, 29],
'rf__max_features':[6, 12, 18, 24, 30],
'rf__n_estimators':[400, 800]
}
smote_enn = SMOTEENN(smote = smoted)
rf = RandomForestClassifier(criterion = 'gini')
pipeline = Pipeline([('smote_enn', smote_enn), ('rf', rf)]) #<-pipeline with smote and model steps
random.seed(1706)
grid_rf = RandomizedSearchCV(estimator = pipeline,
param_distributions = params_rf,
scoring = 'roc_auc',
cv = 8,
n_jobs = cpu_count()-2,
refit = True,
return_train_score = False,
n_iter = 80)
grid_rf.fit(X_train, y_train.values.ravel())
from dask_ml.model_selection import RandomizedSearchCV as DaskRandomGridSearchCV
grid_rf = DaskRandomGridSearchCV(estimator = pipeline,
param_distributions = params_rf,
scoring = 'roc_auc',
cv = 8,
###n_jobs = cpu_count()-2, <-not needed b/c of dask
refit = True,
return_train_score = False,
n_iter = 80)
grid_rf.fit(X_train, y_train.values.ravel())
AttributeError: 'SMOTEENN' object has no attribute 'transform'
为什么它与sklearn的RandomizedSearchCV一起工作,而与dask的不工作
RandomizedSearchCV
我将所有库升级到它们的最新版本。我使用的是Python3.6.9(我还在另一台运行Python3.7.3的机器上进行了尝试,并得到了相同的错误)。