随机森林n_estimators 学习曲线




# 导入包
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
# 实例化红酒数据集
wine = load_wine()
# 划分测试集和训练集
x_train, x_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.3)
# 实例化决策树和随机森林,random_state=0
clf = DecisionTreeClassifier(random_state=0)
rfc = RandomForestClassifier(random_state=0)
# 训练模型
clf.fit(x_train, y_train)
rfc.fit(x_train, y_train)

# 返回测试集的分
clf_score = clf.score(x_test, y_test)
rfc_score = rfc.score(x_test, y_test)
print("sinle tree: {0}\nrandom tree: {1}".format(clf_score, rfc_score))
sinle tree: 0.9074074074074074
random tree: 0.9629629629629629


# 导入交叉验证和画图工具
%matplotlib inline
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
# 实例化决策树和随机森林
clf = DecisionTreeClassifier()
rfc = RandomForestClassifier(n_estimators=25) #创建25棵树组成的随机森林
# 实例化交叉验证 10次
clf_corss = cross_val_score(clf, wine.data, wine.target, cv=10)
rfc_corss = cross_val_score(rfc, wine.data, wine.target, cv=10)
# 查看决策树和随机森林的最好结果
print("single tree mean socre: {}\nrandom tree mean socre {}".format(clf_corss.mean(), rfc_corss.mean()))
single tree mean socre: 0.8705882352941178
random tree mean socre 0.9722222222222221
# 画出决策树和随机森林对比图
plt.plot(range(1, 11), clf_corss, label="single tree")
plt.plot(range(1, 11), rfc_corss, label="random tree")
plt.xticks(range(1, 11))
<matplotlib.legend.Legend at 0x7ff6f4815d50>

clf_corss = cross_val_score(clf, wine.data, wine.target, cv=10)
array([0.88888889, 0.88888889, 0.72222222, 0.88888889, 0.83333333,
0.83333333, 1. , 0.94444444, 0.94117647, 0.76470588])
rfc_corss = cross_val_score(rfc, wine.data, wine.target, cv=10)
array([1.        , 1.        , 0.94444444, 0.94444444, 0.88888889,
1. , 1. , 1. , 1. , 1. ])


# 创建分数列表
clf_list = []
rfc_list = []
for i in range(10):
clf = DecisionTreeClassifier()
rfc = RandomForestClassifier(n_estimators=25)
clf_corss_mean = cross_val_score(clf, wine.data, wine.target, cv=10).mean()
rfc_corss_mean = cross_val_score(rfc, wine.data, wine.target, cv=10).mean()
# 画出决策树和随机森林对比图
plt.plot(range(1, 11), clf_list, label="single tree")
plt.plot(range(1, 11), rfc_list, label="random tree")
plt.xticks(range(1, 11))
<matplotlib.legend.Legend at 0x7ff6f490f670>

n_estimators 学习曲线

# 1-200颗树的学习曲线
superpa = []
for i in range(200):
rfc = RandomForestClassifier(n_estimators=i+1, n_jobs=-1)
rfc_cross = cross_val_score(rfc, wine.data, wine.target, cv=10).mean()
print(max(superpa), superpa.index(max(superpa)))
plt.plot(range(1,201), superpa, label="rfc_cross_mean")
0.9888888888888889 20

<matplotlib.legend.Legend at 0x7ff6f540f100>

随机森林n_estimators 学习曲线的相关教程结束。

