





# 导入模块
import pandas as pd # 导入数据
from sklearn.model_selection import train_test_split # 数据分割
from sklearn.preprocessing import StandardScaler # 数据标准化
from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge # 正规方程,梯度下降, 岭回归
from sklearn.metrics import mean_squared_error # 均方差
import numpy as np
# 读取Boston房价数据
boston = pd.read_csv("./boston_house_prices.csv")
y = boston["MEDV"] # MEDV为离散型目标值
x = boston.drop(["MEDV"],axis=1) # 其他数据为特征值

0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98
1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14
2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03
3 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94
4 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33
... ... ... ... ... ... ... ... ... ... ... ... ... ...
501 0.06263 0.0 11.93 0 0.573 6.593 69.1 2.4786 1 273 21.0 391.99 9.67
502 0.04527 0.0 11.93 0 0.573 6.120 76.7 2.2875 1 273 21.0 396.90 9.08
503 0.06076 0.0 11.93 0 0.573 6.976 91.0 2.1675 1 273 21.0 396.90 5.64
504 0.10959 0.0 11.93 0 0.573 6.794 89.3 2.3889 1 273 21.0 393.45 6.48
505 0.04741 0.0 11.93 0 0.573 6.030 80.8 2.5050 1 273 21.0 396.90 7.88

506 rows × 13 columns

# 数据标准化需要传入二维数组,所以需要改变目标值的形状
y = np.array(y).reshape(-1, 1)
# 划分测试集和训练集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
# 特征值标准化
std_x = StandardScaler().fit(x_train)
x_train = std_x.transform(x_train)
x_test = std_x.transform(x_test)
# 因为特征值标准化后,传入模型的系数会增大,所以目标值也需要进行标准化
std_y = StandardScaler().fit(y_train)
y_train = std_y.transform(y_train)
y_test = std_y.transform(y_test)
# 实例化线性回归
lr = LinearRegression()
# 传入测试集训练模型

# 查看线性回归的回归系数
array([[-0.11432612,  0.12922939,  0.05168773,  0.0306429 , -0.27800333,
0.26465189, 0.02894241, -0.34962992, 0.31569604, -0.24717234,
-0.26784233, 0.11032066, -0.41354896]])
# 线性回归预测测试集的目标值,std_y.inverse_transform:返回标准化之前的值(反标准化)
y_lr_predict = std_y.inverse_transform(lr.predict(x_test))
[23.8507796 ],
[34.6094617 ],
[37.4279415 ],
[32.1620506 ],
[32.6601561 ],
[32.2023306 ],
[25.5830554 ],
[ 4.3558633 ],
[ 8.26805278],
[ 6.3965518 ],
[29.4266973 ],
[23.4489951 ],
[11.0440392 ],
[19.4491492 ],
[ 7.45548609],
[ 3.44149312],
[31.1447006 ],
# 线性回归预测的均方差(损失值)
loss_lr = mean_squared_error(std_y.inverse_transform(y_test), y_lr_predict)
# 实例化梯度下降回归
sgd = SGDRegressor()
sgd.fit(x_train, y_train)
D:\DeveloperTools\Anaconda\lib\site-packages\sklearn\utils\validation.py:1143: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
y = column_or_1d(y, warn=True)

# 查看梯度下降回归的回归系数
array([-0.09761234,  0.08895746, -0.02421963,  0.02879482, -0.17976106,
0.30861884, -0.00250273, -0.27224473, 0.12435245, -0.0780263 ,
-0.24480836, 0.12012805, -0.38888841])
# 梯度下降回归预测测试集的目标值,std_y.inverse_transform:返回标准化之前的值(反标准化)
y_sgd_predict = std_y.inverse_transform(sgd.predict(x_test).reshape(-1,1))
[28.7775393 ],
[21.7295418 ],
[24.5110437 ],
[31.7400822 ],
[17.1185942 ],
[ 6.00742562],
[35.3744289 ],
[ 9.09787342],
[ 5.43064498],
[23.792957 ],
[24.6814747 ],
[ 7.63364889],
[20.4696819 ],
[ 2.27690801],
[23.690842 ],
[20.5993433 ],
[ 8.4159419 ],
# 梯度下降回归预测的均方差(损失值)
loss_sgd = mean_squared_error(std_y.inverse_transform(y_test), y_sgd_predict)
# 实例化岭回归 param:alpha(正则化力度)
rd = Ridge(alpha=1.0)
# 传入训练集 训练模型

# 查看岭回归的回归系数
array([[-0.11307323,  0.12670886,  0.0472335 ,  0.03097279, -0.27277927,
0.26649452, 0.02738887, -0.34543899, 0.30352311, -0.23553989,
-0.26624461, 0.11041044, -0.4112231 ]])
# 岭回归预测测试集的目标值,std_y.inverse_transform:返回标准化之前的值(反标准化)
y_rd_predict = std_y.inverse_transform(rd.predict(x_test))
[14.3927178 ],
[23.3211182 ],
[30.6343198 ],
[24.3590396 ],
[ 4.52270441],
[14.1986027 ],
[ 8.34275415],
[23.6132958 ],
[ 6.38923846],
[11.0745397 ],
[ 7.51316118],
[ 3.43124359],
[34.4813584 ],
[18.8861402 ],
[19.4947593 ],
# 岭回归预测的均方差(损失值)
loss_rd = mean_squared_error(std_y.inverse_transform(y_test), y_rd_predict)


