随机森林n_estimators 学习曲线

2023-05-06,,

随机森林

单颗树与随机森林的的分对比

# 导入包
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
# 实例化红酒数据集
wine = load_wine()
# 划分测试集和训练集
x_train, x_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.3)
# 实例化决策树和随机森林,random_state=0
clf = DecisionTreeClassifier(random_state=0)
rfc = RandomForestClassifier(random_state=0)
# 训练模型
clf.fit(x_train, y_train)
rfc.fit(x_train, y_train)

#sk-container-id-1 { color: rgba(0, 0, 0, 1); background-color: rgba(255, 255, 255, 1) }
#sk-container-id-1 pre { padding: 0 }
#sk-container-id-1 div.sk-toggleable { background-color: rgba(255, 255, 255, 1) }
#sk-container-id-1 label.sk-toggleable__label { cursor: pointer; display: block; width: 100%; margin-bottom: 0; padding: 0.3em; box-sizing: border-box; text-align: center }
#sk-container-id-1 label.sk-toggleable__label-arrow:before { content: "▸"; float: left; margin-right: 0.25em; color: rgba(105, 105, 105, 1) }
#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before { color: rgba(0, 0, 0, 1) }
#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before { color: rgba(0, 0, 0, 1) }
#sk-container-id-1 div.sk-toggleable__content { max-height: 0; max-width: 0; overflow: hidden; text-align: left; background-color: rgba(240, 248, 255, 1) }
#sk-container-id-1 div.sk-toggleable__content pre { margin: 0.2em; color: rgba(0, 0, 0, 1); border-radius: 0.25em; background-color: rgba(240, 248, 255, 1) }
#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content { max-height: 200px; max-width: 100%; overflow: auto }
#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before { content: "▾" }
#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label { background-color: rgba(212, 235, 255, 1) }
#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label { background-color: rgba(212, 235, 255, 1) }
#sk-container-id-1 input.sk-hidden--visually { border: 0; clip: rect(1px, 1px, 1px, 1px); height: 1px; margin: -1px; overflow: hidden; padding: 0; position: absolute; width: 1px }
#sk-container-id-1 div.sk-estimator { font-family: monospace; background-color: rgba(240, 248, 255, 1); border: 1px dotted rgba(0, 0, 0, 1); border-radius: 0.25em; box-sizing: border-box; margin-bottom: 0.5em }
#sk-container-id-1 div.sk-estimator:hover { background-color: rgba(212, 235, 255, 1) }
#sk-container-id-1 div.sk-parallel-item::after { content: ""; width: 100%; border-bottom: 1px solid rgba(128, 128, 128, 1); flex-grow: 1 }
#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label { background-color: rgba(212, 235, 255, 1) }
#sk-container-id-1 div.sk-serial::before { content: ""; position: absolute; border-left: 1px solid rgba(128, 128, 128, 1); box-sizing: border-box; top: 0; bottom: 0; left: 50%; z-index: 0 }
#sk-container-id-1 div.sk-serial { display: flex; flex-direction: column; align-items: center; background-color: rgba(255, 255, 255, 1); padding-right: 0.2em; padding-left: 0.2em; position: relative }
#sk-container-id-1 div.sk-item { position: relative; z-index: 1 }
#sk-container-id-1 div.sk-parallel { display: flex; align-items: stretch; justify-content: center; background-color: rgba(255, 255, 255, 1); position: relative }
#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before { content: ""; position: absolute; border-left: 1px solid rgba(128, 128, 128, 1); box-sizing: border-box; top: 0; bottom: 0; left: 50%; z-index: -1 }
#sk-container-id-1 div.sk-parallel-item { display: flex; flex-direction: column; z-index: 1; position: relative; background-color: rgba(255, 255, 255, 1) }
#sk-container-id-1 div.sk-parallel-item:first-child::after { align-self: flex-end; width: 50% }
#sk-container-id-1 div.sk-parallel-item:last-child::after { align-self: flex-start; width: 50% }
#sk-container-id-1 div.sk-parallel-item:only-child::after { width: 0 }
#sk-container-id-1 div.sk-dashed-wrapped { border: 1px dashed rgba(128, 128, 128, 1); margin: 0 0.4em 0.5em; box-sizing: border-box; padding-bottom: 0.4em; background-color: rgba(255, 255, 255, 1) }
#sk-container-id-1 div.sk-label label { font-family: monospace; font-weight: bold; display: inline-block; line-height: 1.2em }
#sk-container-id-1 div.sk-label-container { text-align: center }
#sk-container-id-1 div.sk-container { display: inline-block !important; position: relative }
#sk-container-id-1 div.sk-text-repr-fallback { display: none }

RandomForestClassifier(random_state=0)

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

RandomForestClassifier

RandomForestClassifier(random_state=0)
# 返回测试集的分
clf_score = clf.score(x_test, y_test)
rfc_score = rfc.score(x_test, y_test)
print("sinle tree: {0}\nrandom tree: {1}".format(clf_score, rfc_score))
sinle tree: 0.9074074074074074
random tree: 0.9629629629629629

单颗树与随机森林在交叉验证下的对比图

# 导入交叉验证和画图工具
%matplotlib inline
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
# 实例化决策树和随机森林
clf = DecisionTreeClassifier()
rfc = RandomForestClassifier(n_estimators=25) #创建25棵树组成的随机森林
# 实例化交叉验证 10次
clf_corss = cross_val_score(clf, wine.data, wine.target, cv=10)
rfc_corss = cross_val_score(rfc, wine.data, wine.target, cv=10)
# 查看决策树和随机森林的最好结果
print("single tree mean socre: {}\nrandom tree mean socre {}".format(clf_corss.mean(), rfc_corss.mean()))
single tree mean socre: 0.8705882352941178
random tree mean socre 0.9722222222222221
# 画出决策树和随机森林对比图
plt.plot(range(1, 11), clf_corss, label="single tree")
plt.plot(range(1, 11), rfc_corss, label="random tree")
plt.xticks(range(1, 11))
plt.legend()
<matplotlib.legend.Legend at 0x7ff6f4815d50>

clf_corss = cross_val_score(clf, wine.data, wine.target, cv=10)
clf_corss
array([0.88888889, 0.88888889, 0.72222222, 0.88888889, 0.83333333,
0.83333333, 1. , 0.94444444, 0.94117647, 0.76470588])
rfc_corss = cross_val_score(rfc, wine.data, wine.target, cv=10)
rfc_corss
array([1.        , 1.        , 0.94444444, 0.94444444, 0.88888889,
1. , 1. , 1. , 1. , 1. ])

十次交叉验证下决策树和随机森林的对比

# 创建分数列表
clf_list = []
rfc_list = []
for i in range(10):
clf = DecisionTreeClassifier()
rfc = RandomForestClassifier(n_estimators=25)
clf_corss_mean = cross_val_score(clf, wine.data, wine.target, cv=10).mean()
rfc_corss_mean = cross_val_score(rfc, wine.data, wine.target, cv=10).mean()
clf_list.append(clf_corss_mean)
rfc_list.append(rfc_corss_mean)
# 画出决策树和随机森林对比图
plt.plot(range(1, 11), clf_list, label="single tree")
plt.plot(range(1, 11), rfc_list, label="random tree")
plt.xticks(range(1, 11))
plt.legend()
<matplotlib.legend.Legend at 0x7ff6f490f670>

n_estimators 学习曲线

# 1-200颗树的学习曲线
superpa = []
for i in range(200):
rfc = RandomForestClassifier(n_estimators=i+1, n_jobs=-1)
rfc_cross = cross_val_score(rfc, wine.data, wine.target, cv=10).mean()
superpa.append(rfc_cross)
print(max(superpa), superpa.index(max(superpa)))
plt.figure(figsize=(20,8))
plt.plot(range(1,201), superpa, label="rfc_cross_mean")
plt.legend()
0.9888888888888889 20

<matplotlib.legend.Legend at 0x7ff6f540f100>

随机森林n_estimators 学习曲线的相关教程结束。

《随机森林n_estimators 学习曲线.doc》

下载本文的Word格式文档,以方便收藏与打印。