吴恩达机器学习课后作业——偏差和方差
                                                            生活随笔
收集整理的這篇文章主要介紹了
                                吴恩达机器学习课后作业——偏差和方差
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.                        
                                1.寫在前面
吳恩達機器學習的課后作業(yè)及數據可以在coursera平臺上進行下載,只要注冊一下就可以添加課程了。所以這里就不寫題目和數據了,有需要的小伙伴自行去下載就可以了。
 作業(yè)及數據下載網址:吳恩達機器學習課程
2.偏差和方差
偏差和方差的作業(yè)內容比較多,主要有以下幾項:
下面附上代碼,有詳細的注釋,這里就不一一解釋了。
import numpy as np import matplotlib.pyplot as plt import pandas as pd import scipy.io as scio # 用于導入mat文件的庫 from scipy.optimize import minimize# 用于導入數據的函數 def input_data():dataFile = 'machine-learning-ex5\\machine-learning-ex5\\ex5\\ex5data1.mat'# 導入mat文件數據data = scio.loadmat(dataFile)# 導入訓練集數據train_X = data['X']train_y = data['y']# 導入測試集數據test_X = data['Xtest']test_y = data['ytest']# 導入交叉驗證集數據val_X = data['Xval']val_y = data['yval']return train_X, train_y, test_X, test_y, val_X, val_y# 用于數據可視化的函數 def visualize_data(X, y):fig, ax = plt.subplots(1, 1)ax.scatter(X, y) # 繪制散點圖ax.set_xticks([k for k in range(-50, 50, 10)]) # 設置x軸坐標ax.set_yticks([k for k in range(0, 45, 5)]) # 設置y軸坐標ax.set_xlabel('Change in water level(x)') # 設置x軸標題ax.set_ylabel('Water flowing out of the dam (y)') # 設置y軸標題plt.show()# 用于計算代價的函數 def compute_costs(theta, X, y, lamda):theta = theta.reshape(theta.shape[0],1) # 將theta從一維還原成二維m = X.shape[0] # 獲得mcostJ1 = np.sum(np.power(X @ theta - y, 2)) # 計算代價函數第一部分costJ2 = np.sum(np.power(theta[1:, 0], 2)) * lamda # 計算代價函數第二部分return (costJ1 + costJ2) / (2 * m) # 計算并返回# 用于計算梯度的函數 def compute_gradient(theta, X, y, lamda):theta = theta.reshape(theta.shape[0], 1) #將theta數組從一維還原成二維m = X.shape[0] # 獲得mgradient = np.sum((X @ theta - y) * X, axis=0) #計算梯度第一部分gradient = gradient.reshape(gradient.shape[0], 1) # 將gradient從一維還原成二維reg = theta * lamda # 計算梯度第二部分reg[0,0] = 0 # 因為theta0不參與計算,所以要單獨進行修改return (gradient + reg) / m # 計算并返回# 用于擬合線性模型 def fit_linear_regression(theta, X, y, lamda):# 調用minimize方法求得最小值res = minimize(fun=compute_costs, x0=theta, args=(X, y, lamda), method='TNC', jac=compute_gradient,options={'maxiter': 100})final_theta = res.x # 獲得最優(yōu)theta數組return final_theta# 用于繪制線性回歸圖像的函數 def plot_linear_regression(final_theta, train_X,train_y):px = np.linspace(np.min(train_X[:, 1]), np.max(train_X[:, 1]), 100) # 產生自變量xpx = px.reshape(px.shape[0], 1) # 將數據從一維還原成二維px = np.insert(px, 0, 1, axis=1) # 插入一列全為1的列py = px @ final_theta # 計算預測的值# 繪制散點圖和預測曲線fig, ax = plt.subplots(1, 1)ax.scatter(train_X[:, 1], train_y)ax.plot(px[:, 1], py)ax.set_xticks([k for k in range(-50, 50, 10)])ax.set_yticks([k for k in range(-5, 45, 5)])ax.set_xlabel('Change in water level(x)')ax.set_ylabel('Water flowing out of the dam (y)')plt.show()# 用于繪制線性模型的學習曲線 def plot_linear_learning_curves(train_X, train_y, val_X, val_y, lamda):error_train = [] # 訓練集代價數組error_val = [] # 交叉驗證集代價數組for i in range(0, train_X.shape[0]): # 逐個增加訓練集的訓練樣本數theta = np.ones((train_X.shape[1], 1)) # 初始化theta數組# 調用線性回歸進行擬合,并獲得最優(yōu)theta數組theta = fit_linear_regression(theta, train_X[0:i + 1, :], train_y[0:i + 1, :], lamda)# 利用最優(yōu)theta計算訓練集代價,注意這里的樣本數為訓練集中的(i+1)樣本train_error = compute_costs(theta, train_X[0:i + 1, :], train_y[0:i + 1, :], 0)# 利用最優(yōu)theta計算交叉驗證集代價,注意這里的樣本數為整個交叉驗證集val_error = compute_costs(theta, val_X, val_y, 0)error_train.append(train_error) # 將當前size的代價添加到訓練集代價數組中error_val.append(val_error) # 將整個交叉驗證集的代價添加到交叉驗證集代價數組中# 繪制散點圖和擬合曲線fig, ax = plt.subplots(1, 1)ax.plot([i for i in range(1, train_X.shape[0] + 1)], error_train, c='blue', label='Train')ax.plot([i for i in range(1, train_X.shape[0] + 1)], error_val, c='green', label='Cross Validation')ax.set_xticks(np.arange(0, 13, 2))ax.set_yticks(np.arange(0, 151, 50))plt.legend()plt.show()# 用于將特征映射會高維度的函數 def map_polynomial_features(X, p):for i in range(2, p + 1): # 從2次方開始一直到p次方X = np.insert(X, X.shape[1], values=np.power(X[:, 1], i), axis=1)return X# 用于進行特征縮放的函數(均值歸一化) def feature_normalize(data, d,dataMean,dataStd):for i in range(1, d + 1):for j in range(0, data.shape[0]): # 遍歷第i列中每一個數值data[j, i] = (data[j, i] - dataMean[i]) / dataStd[i] # 利用吳恩達老師的公式進行歸一化return data# 用于獲得數據X的均值和方差的函數 def get_means_stds(X):means = np.mean(X,axis=0) # 計算均值stds = np.std(X,axis=0) # 計算方差return means,stds# 用于擬合多項式模型的函數 def fit_polynomical_regression(theta, train_X, train_y, lamda, d,train_mean,train_std):poly_features = map_polynomial_features(train_X, d) # 將訓練集映射到高緯度nor_poly_features = feature_normalize(poly_features, d,train_mean,train_std) # 進行歸一化theta = np.ones((nor_poly_features.shape[1],1)) # 初始化theta數組final_theta = fit_linear_regression(theta, nor_poly_features, train_y, lamda) # 調用線性回歸進行擬合final_theta = final_theta.reshape(final_theta.shape[0], 1) # 將最優(yōu)theta數組從一維還原成二維return final_theta# 用于繪制多項式回歸圖像的函數 def plot_polynomical_regression(final_theta, train_X,train_y,d, train_mean, train_std):x = np.linspace(-70, 60, 100) # 從-70到60之間產生100個數xx = x.reshape(x.shape[0], 1) # 將數據從一維還原成二維數據xx = np.insert(xx, 0, 1, axis=1) # 插入一列全為1的列xx = map_polynomial_features(xx, d) # 將產生的數據映射到高緯度# 特別注意這里要使用訓練集的均值和方差,而不是從產生的數據中的均值和方差xx = feature_normalize(xx, d, train_mean, train_std) # 進行歸一化yy = xx @ final_theta # 計算預測的值# 繪制散點圖和擬合曲線fig, ax = plt.subplots(1, 1)ax.scatter(train_X[:, 1], train_y, c='red')ax.plot(x, yy.flatten(), c='blue', linestyle='--')ax.set_xticks([k for k in range(-80, 81, 20)])ax.set_yticks([k for k in range(-60, 41, 10)])ax.set_xlabel('Change in water level(x)')ax.set_ylabel('Water flowing out of the dam (y)')plt.show()# 用于繪制多項式回歸的學習曲線的函數 def plot_poly_learning_curves(train_X, train_y, val_X, val_y, lamda,d,train_mean,train_std,val_mean,val_std):error_train = [] # 定義訓練集代價數組error_val = [] # 定義交叉驗證集代價數組for i in range(0, train_X.shape[0]): # 訓練集size從0開始逐漸遞增theta = np.ones((d+1, 1)) # 初始化theta數組# 調用多項式線性回歸函數獲得最優(yōu)theta值theta = fit_polynomical_regression(theta, train_X[0:i + 1, :], train_y[0:i + 1, :], lamda,d,train_mean,train_std)# 將(i+1)個訓練樣本映射為高緯度train_poly_features = map_polynomial_features(train_X, d)# 進行歸一化,注意這里使用的是整個訓練集的均值和代價train_nor_poly_features = feature_normalize(train_poly_features, d,train_mean,train_std)# 計算(i+1)個訓練集樣本的代價train_error = compute_costs(theta, train_nor_poly_features[0:i + 1, :], train_y[0:i + 1, :], 0)# 將整個交叉驗證集映射到高緯度val_poly_features = map_polynomial_features(val_X, d)# 對整個交叉驗證集進行歸一化,特別注意這里使用的是訓練集的均值和方差val_nor_poly_features = feature_normalize(val_poly_features, d,train_mean,train_std)# 計算整個交叉驗證集的代價val_error = compute_costs(theta, val_nor_poly_features, val_y, 0)error_train.append(train_error) # 將訓練集代價添加到訓練集代價數組中error_val.append(val_error) # 將交叉驗證集代價添加到交叉驗證集代價數組中# 繪制散點圖和擬合曲線fig, ax = plt.subplots(1, 1)ax.plot([i for i in range(1, train_X.shape[0] + 1)], error_train, c='blue', label='Train')ax.plot([i for i in range(1, train_X.shape[0] + 1)], error_val, c='green', label='Cross Validation')ax.set_xticks(np.arange(0, 13, 2))ax.set_yticks(np.arange(0, 101, 10))plt.legend()plt.show()# 繪制隨lamda的變化,訓練集、測試集、交叉驗證集的代價 def plot_lamda_curve(theta, train_X, train_y, val_X, val_y,test_X,test_y,d,train_mean,train_std):lamda = [0,0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10] # 設置lamda數組error_train = [] # 訓練集代價數組error_val = [] # 交叉驗證集代價數組error_test = [] # 測試集代價數組for k in lamda: # 遍歷每一種lamdatheta = np.ones((d + 1, 1)) # 初始化theta數組# 調用多項式回歸擬合函數theta = fit_polynomical_regression(theta, train_X, train_y, k,d,train_mean,train_std)train_poly_features = map_polynomial_features(train_X, d) # 訓練集進行映射高維度# 注意這里使用的是訓練集的均值和方差train_nor_poly_features = feature_normalize(train_poly_features, d,train_mean,train_std) # 訓練集進行歸一化train_error = compute_costs(theta, train_nor_poly_features, train_y, 0) # 訓練集代價val_poly_features = map_polynomial_features(val_X, d) # 交叉驗證集進行映射高維度# 注意這里使用的是訓練集的均值和方差val_nor_poly_features = feature_normalize(val_poly_features, d,train_mean,train_std) # 交叉驗證集集進行歸一化val_error = compute_costs(theta, val_nor_poly_features, val_y, 0) # 交叉驗證集代價test_poly_features = map_polynomial_features(test_X, d) # 測試集進行映射高維度# 注意這里使用的是訓練集的均值和方差test_nor_poly_features = feature_normalize(test_poly_features, d, train_mean, train_std) # 測試集進行歸一化test_error = compute_costs(theta, test_nor_poly_features, test_y, 0) # 測試集代價error_train.append(train_error) # 將訓練集代價加入到數組中error_val.append(val_error) # 將交叉驗證集代價加入到數組中error_test.append(test_error) # 將測試集代價加入到數組中# 繪制三種數據集的代價隨著lamda變化的圖像fig,ax = plt.subplots(1,1)ax.plot(lamda,error_train,label='Train',c='b')ax.plot(lamda, error_val, label='Cross Validation', c='g')ax.plot(lamda, error_test, label='Test', c='r')ax.set_xticks(np.arange(0,11,1))ax.set_yticks(np.arange(0,21,2))plt.legend()plt.show()d=6 # 定義多項式的階數 lamda = 0 # 定義lamda train_X, train_y, test_X, test_y, val_X, val_y = input_data() # 分別導入三種數據集的數據 train_X = np.insert(train_X, 0, 1, axis=1) # 訓練集增加全1的列 test_X = np.insert(test_X, 0, 1, axis=1) # 測試集集增加全1的列 val_X = np.insert(val_X, 0, 1, axis=1) # 交叉驗證集集增加全1的列train_poly_X = map_polynomial_features(train_X, d) # 將訓練集映射成高維度 test_poly_X = map_polynomial_features(test_X, d) # 將測試集映射成高維度 val_poly_X = map_polynomial_features(val_X, d) # 將交叉驗證集映射成高維度train_mean,train_std=get_means_stds(train_poly_X) # 將訓練集歸一化 test_mean,test_std=get_means_stds(test_poly_X) # 將測試集歸一化 val_mean,val_std=get_means_stds(val_poly_X) # 將交叉驗證集歸一化theta = np.ones((2, 1)) # 用于一元線性回歸的theta數組初始化 # 調用一元線性回歸 final_theta = fit_linear_regression(theta, train_X, train_y, lamda) # 繪制一元線性回歸擬合曲線 plot_linear_regression(final_theta, train_X,train_y) # 繪制一元線性回歸學習曲線 plot_linear_learning_curves(train_X, train_y, val_X, val_y, lamda) # 調用多項式回歸 final_theta = fit_polynomical_regression(theta, train_X, train_y, lamda, d,train_mean,train_std) # 繪制多項式回歸擬合曲線 plot_polynomical_regression(final_theta, train_X,train_y,d, train_mean, train_std) # 繪制多項式回歸學習曲線 plot_poly_learning_curves(train_X, train_y, val_X, val_y, lamda,d,train_mean,train_std,val_mean,val_std) # 繪制數據集隨lamda的代價函數曲線 plot_lamda_curve(theta, train_X, train_y, val_X, val_y,test_X,test_y,d,train_mean,train_std)結果展示:
 一元線性回歸擬合效果:
 
 一元線性回歸學習曲線:
 
 多項式回歸擬合效果:
 
 多項式回歸學習曲線:
 
 lamda變化對于各個數據集的影響
 
總結
以上是生活随笔為你收集整理的吴恩达机器学习课后作业——偏差和方差的全部內容,希望文章能夠幫你解決所遇到的問題。
                            
                        - 上一篇: Python骚操作,让图片上的人物动起来
 - 下一篇: 宇宙是一个无始无终的循环?道翰天琼认知智