import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases_v2 import*from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward%matplotlib inline
plt.rcParams['figure.figsize']=(5.0,4.0)# set default size of plots
plt.rcParams['image.interpolation']='nearest'
plt.rcParams['image.cmap']='gray'%load_ext autoreload
%autoreload 2np.random.seed(1)
2. 算法主要流程
3. 初始化
第4節筆記:01.神經網絡和深度學習 W4.深層神經網絡
3.1 兩層神經網絡
模型結構:LINEAR -> RELU -> LINEAR -> SIGMOID 權重:np.random.randn(shape)*0.01 偏置:np.zeros(shape)
# GRADED FUNCTION: initialize_parametersdefinitialize_parameters(n_x, n_h, n_y):"""Argument:n_x -- size of the input layern_h -- size of the hidden layern_y -- size of the output layerReturns:parameters -- python dictionary containing your parameters:W1 -- weight matrix of shape (n_h, n_x)b1 -- bias vector of shape (n_h, 1)W2 -- weight matrix of shape (n_y, n_h)b2 -- bias vector of shape (n_y, 1)"""np.random.seed(1)### START CODE HERE ### (≈ 4 lines of code)W1 = np.random.randn(n_h, n_x)*0.01b1 = np.zeros((n_h,1))W2 = np.random.randn(n_y, n_h)*0.01b2 = np.zeros((n_y,1))### END CODE HERE ###assert(W1.shape ==(n_h, n_x))assert(b1.shape ==(n_h,1))assert(W2.shape ==(n_y, n_h))assert(b2.shape ==(n_y,1))parameters ={"W1": W1,"b1": b1,"W2": W2,"b2": b2}return parameters
3.2 多層神經網絡
模型結構:[LINEAR -> RELU] × (L-1) -> LINEAR -> SIGMOID
# GRADED FUNCTION: initialize_parameters_deepdefinitialize_parameters_deep(layer_dims):"""Arguments:layer_dims -- python array (list) containing the dimensions of each layer in our networkReturns:parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])bl -- bias vector of shape (layer_dims[l], 1)"""np.random.seed(3)parameters ={}L =len(layer_dims)# number of layers in the networkfor l inrange(1, L):### START CODE HERE ### (≈ 2 lines of code)parameters['W'+str(l)]= np.random.randn(layer_dims[l], layer_dims[l-1])*0.01parameters['b'+str(l)]= np.zeros((layer_dims[l],1))### END CODE HERE ###assert(parameters['W'+str(l)].shape ==(layer_dims[l], layer_dims[l-1]))assert(parameters['b'+str(l)].shape ==(layer_dims[l],1))return parameters
# GRADED FUNCTION: linear_forwarddeflinear_forward(A, W, b):"""Implement the linear part of a layer's forward propagation.Arguments:A -- activations from previous layer (or input data): (size of previous layer, number of examples)W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)b -- bias vector, numpy array of shape (size of the current layer, 1)Returns:Z -- the input of the activation function, also called pre-activation parameter cache -- a python dictionary containing "A", "W" and "b" ; stored for computing the backward pass efficiently"""### START CODE HERE ### (≈ 1 line of code)Z = np.dot(W, A)+ b### END CODE HERE ###assert(Z.shape ==(W.shape[0], A.shape[1]))cache =(A, W, b)return Z, cache
# GRADED FUNCTION: linear_activation_forwarddeflinear_activation_forward(A_prev, W, b, activation):"""Implement the forward propagation for the LINEAR->ACTIVATION layerArguments:A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)b -- bias vector, numpy array of shape (size of the current layer, 1)activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"Returns:A -- the output of the activation function, also called the post-activation value cache -- a python dictionary containing "linear_cache" and "activation_cache";stored for computing the backward pass efficiently"""if activation =="sigmoid":# Inputs: "A_prev, W, b". Outputs: "A, activation_cache".### START CODE HERE ### (≈ 2 lines of code)Z, linear_cache = linear_forward(A_prev, W, b)A, activation_cache = sigmoid(Z)### END CODE HERE ###elif activation =="relu":# Inputs: "A_prev, W, b". Outputs: "A, activation_cache".### START CODE HERE ### (≈ 2 lines of code)Z, linear_cache = linear_forward(A_prev, W, b)A, activation_cache = relu(Z)### END CODE HERE ###assert(A.shape ==(W.shape[0], A_prev.shape[1]))cache =(linear_cache, activation_cache)return A, cache
4.3 多層模型
前面使用 L?1L-1L?1 層 ReLu,最后使用 1 層 Sigmoid
# GRADED FUNCTION: L_model_forwarddefL_model_forward(X, parameters):"""Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computationArguments:X -- data, numpy array of shape (input size, number of examples)parameters -- output of initialize_parameters_deep()Returns:AL -- last post-activation valuecaches -- list of caches containing:every cache of linear_relu_forward() (there are L-1 of them, indexed from 0 to L-2)the cache of linear_sigmoid_forward() (there is one, indexed L-1)"""caches =[]A = XL =len(parameters)//2# number of layers in the neural network# Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.for l inrange(1, L):A_prev = A ### START CODE HERE ### (≈ 2 lines of code)A, cache = linear_activation_forward(A_prev, parameters['W'+str(l)], parameters['b'+str(l)],'relu')caches.append(cache)# 每一層的 (A,W,b, Z)### END CODE HERE #### Implement LINEAR -> SIGMOID. Add "cache" to the "caches" list.### START CODE HERE ### (≈ 2 lines of code)AL, cache = linear_activation_forward(A, parameters['W'+str(L)], parameters['b'+str(L)],'sigmoid')caches.append(cache)### END CODE HERE ###assert(AL.shape ==(1,X.shape[1]))return AL, caches
# GRADED FUNCTION: compute_costdefcompute_cost(AL, Y):"""Implement the cost function defined by equation (7).Arguments:AL -- probability vector corresponding to your label predictions, shape (1, number of examples)Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)Returns:cost -- cross-entropy cost"""m = Y.shape[1]# Compute loss from aL and y.### START CODE HERE ### (≈ 1 lines of code)cost = np.sum(Y*np.log(AL)+(1-Y)*np.log(1-AL))/(-m)### END CODE HERE ###cost = np.squeeze(cost)# To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).assert(cost.shape ==())return cost
# GRADED FUNCTION: linear_backwarddeflinear_backward(dZ, cache):"""Implement the linear portion of backward propagation for a single layer (layer l)Arguments:dZ -- Gradient of the cost with respect to the linear output (of current layer l)cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layerReturns:dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prevdW -- Gradient of the cost with respect to W (current layer l), same shape as Wdb -- Gradient of the cost with respect to b (current layer l), same shape as b"""A_prev, W, b = cachem = A_prev.shape[1]### START CODE HERE ### (≈ 3 lines of code)dW = np.dot(dZ, A_prev.T)/mdb =1/m*np.sum(dZ, axis=1, keepdims=True)dA_prev = np.dot(W.T, dZ)### END CODE HERE ###assert(dA_prev.shape == A_prev.shape)assert(dW.shape == W.shape)assert(db.shape == b.shape)return dA_prev, dW, db
# GRADED FUNCTION: linear_activation_backwarddeflinear_activation_backward(dA, cache, activation):"""Implement the backward propagation for the LINEAR->ACTIVATION layer.Arguments:dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficientlyactivation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"Returns:dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prevdW -- Gradient of the cost with respect to W (current layer l), same shape as Wdb -- Gradient of the cost with respect to b (current layer l), same shape as b"""linear_cache, activation_cache = cacheif activation =="relu":### START CODE HERE ### (≈ 2 lines of code)dZ = relu_backward(dA, activation_cache)dA_prev, dW, db = linear_backward(dZ, linear_cache)### END CODE HERE ###elif activation =="sigmoid":### START CODE HERE ### (≈ 2 lines of code)dZ = sigmoid_backward(dA, activation_cache)dA_prev, dW, db = linear_backward(dZ, linear_cache)### END CODE HERE ###return dA_prev, dW, db
# GRADED FUNCTION: L_model_backwarddefL_model_backward(AL, Y, caches):"""Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID groupArguments:AL -- probability vector, output of the forward propagation (L_model_forward())Y -- true "label" vector (containing 0 if non-cat, 1 if cat)caches -- list of caches containing:every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])Returns:grads -- A dictionary with the gradientsgrads["dA" + str(l)] = ... grads["dW" + str(l)] = ...grads["db" + str(l)] = ... """grads ={}L =len(caches)# the number of layersm = AL.shape[1]Y = Y.reshape(AL.shape)# after this line, Y is the same shape as AL# Initializing the backpropagation### START CODE HERE ### (1 line of code)dAL =-np.divide(Y, AL)+ np.divide(1-Y,1-AL)### END CODE HERE #### Lth layer (SIGMOID -> LINEAR) gradients. # Inputs: "AL, Y, caches". # Outputs: "grads["dAL"], grads["dWL"], grads["dbL"]### START CODE HERE ### (approx. 2 lines)current_cache = caches[L-1]grads["dA"+str(L)], grads["dW"+str(L)], grads["db"+str(L)]= linear_activation_backward(dAL, current_cache,'sigmoid')### END CODE HERE ###for l inreversed(range(L-1)):# lth layer: (RELU -> LINEAR) gradients.# Inputs: "grads["dA" + str(l + 2)], caches". # Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)] ### START CODE HERE ### (approx. 5 lines)current_cache = caches[l]dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads['dA'+str(l+2)], current_cache,'relu')grads["dA"+str(l +1)]= dA_prev_tempgrads["dW"+str(l +1)]= dW_tempgrads["db"+str(l +1)]= db_temp### END CODE HERE ###return grads
# GRADED FUNCTION: update_parametersdefupdate_parameters(parameters, grads, learning_rate):"""Update parameters using gradient descentArguments:parameters -- python dictionary containing your parameters grads -- python dictionary containing your gradients, output of L_model_backwardReturns:parameters -- python dictionary containing your updated parameters parameters["W" + str(l)] = ... parameters["b" + str(l)] = ..."""L =len(parameters)//2# number of layers in the neural network# Update rule for each parameter. Use a for loop.### START CODE HERE ### (≈ 3 lines of code)for l inrange(L):parameters["W"+str(l+1)]= parameters['W'+str(l+1)]- learning_rate * grads['dW'+str(l+1)]parameters["b"+str(l+1)]= parameters['b'+str(l+1)]- learning_rate * grads['db'+str(l+1)]### END CODE HERE ###return parameters
作業2. 深度神經網絡應用:圖像分類
使用上面的函數,建立深度神經網絡,并對圖片是不是貓進行預測。
1. 導入包
import time
import numpy as np
import h5py
import matplotlib.pyplot as plt
import scipy
from PIL import Image
from scipy import ndimage
from dnn_app_utils_v2 import*%matplotlib inline
plt.rcParams['figure.figsize']=(5.0,4.0)# set default size of plots
plt.rcParams['image.interpolation']='nearest'
plt.rcParams['image.cmap']='gray'%load_ext autoreload
%autoreload 2np.random.seed(1)
# Example of a picture
index =1
plt.imshow(train_x_orig[index])print("y = "+str(train_y[0,index])+". It's a "+ classes[train_y[0,index]].decode("utf-8")+" picture.")
查看數據大小
# Explore your dataset
m_train = train_x_orig.shape[0]
num_px = train_x_orig.shape[1]
m_test = test_x_orig.shape[0]print("Number of training examples: "+str(m_train))print("Number of testing examples: "+str(m_test))print("Each image is of size: ("+str(num_px)+", "+str(num_px)+", 3)")print("train_x_orig shape: "+str(train_x_orig.shape))print("train_y shape: "+str(train_y.shape))print("test_x_orig shape: "+str(test_x_orig.shape))print("test_y shape: "+str(test_y.shape))
Number of training examples:209
Number of testing examples:50
Each image is of size:(64,64,3)
train_x_orig shape:(209,64,64,3)
train_y shape:(1,209)
test_x_orig shape:(50,64,64,3)
test_y shape:(1,50)
圖片數據向量化
# Reshape the training and test examples
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0],-1).T # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0],-1).T# Standardize data to have feature values between 0 and 1.
train_x = train_x_flatten/255.
test_x = test_x_flatten/255.print("train_x's shape: "+str(train_x.shape))print("test_x's shape: "+str(test_x.shape))
train_x's shape:(12288,209)# 12288 = 64 * 64 * 3
test_x's shape:(12288,50)
3. 建立模型
3.1 兩層神經網絡
3.2 多層神經網絡
3.3 一般步驟
初始化參數 / 定義超參數
n_iters次 迭代循環: – a. 正向傳播 – b. 計算成本函數 – c. 反向傳播 – d. 更新參數(使用參數、梯度)
# GRADED FUNCTION: two_layer_modeldeftwo_layer_model(X, Y, layers_dims, learning_rate =0.0075, num_iterations =3000, print_cost=False):"""Implements a two-layer neural network: LINEAR->RELU->LINEAR->SIGMOID.Arguments:X -- input data, of shape (n_x, number of examples)Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)layers_dims -- dimensions of the layers (n_x, n_h, n_y)num_iterations -- number of iterations of the optimization looplearning_rate -- learning rate of the gradient descent update ruleprint_cost -- If set to True, this will print the cost every 100 iterations Returns:parameters -- a dictionary containing W1, W2, b1, and b2"""np.random.seed(1)grads ={}costs =[]# to keep track of the costm = X.shape[1]# number of examples(n_x, n_h, n_y)= layers_dims# Initialize parameters dictionary, by calling one of the functions you'd previously implemented### START CODE HERE ### (≈ 1 line of code)parameters = initialize_parameters(n_x, n_h, n_y)### END CODE HERE #### Get W1, b1, W2 and b2 from the dictionary parameters.W1 = parameters["W1"]b1 = parameters["b1"]W2 = parameters["W2"]b2 = parameters["b2"]# Loop (gradient descent)for i inrange(0, num_iterations):# Forward propagation: LINEAR -> RELU -> LINEAR -> SIGMOID. # Inputs: "X, W1, b1". # Output: "A1, cache1, A2, cache2".### START CODE HERE ### (≈ 2 lines of code)A1, cache1 = linear_activation_forward(X, W1, b1,'relu')A2, cache2 = linear_activation_forward(A1, W2, b2,'sigmoid')### END CODE HERE #### Compute cost### START CODE HERE ### (≈ 1 line of code)cost = compute_cost(A2, Y)### END CODE HERE #### Initializing backward propagationdA2 =- np.divide(Y, A2)+ np.divide(1- Y,1- A2)# Backward propagation. # Inputs: "dA2, cache2, cache1". # Outputs: "dA1, dW2, db2; also dA0 (not used), dW1, db1".### START CODE HERE ### (≈ 2 lines of code)dA1, dW2, db2 = linear_activation_backward(dA2, cache2,'sigmoid')dA0, dW1, db1 = linear_activation_backward(dA1, cache1,'relu')### END CODE HERE #### Set grads['dWl'] to dW1, grads['db1'] to db1, grads['dW2'] to dW2, grads['db2'] to db2grads['dW1']= dW1grads['db1']= db1grads['dW2']= dW2grads['db2']= db2# Update parameters.### START CODE HERE ### (approx. 1 line of code)parameters = update_parameters(parameters, grads, learning_rate)### END CODE HERE #### Retrieve W1, b1, W2, b2 from parametersW1 = parameters["W1"]b1 = parameters["b1"]W2 = parameters["W2"]b2 = parameters["b2"]# Print the cost every 100 training exampleif print_cost and i %100==0:print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))if print_cost and i %100==0:costs.append(cost)# plot the costplt.plot(np.squeeze(costs))plt.ylabel('cost')plt.xlabel('iterations (per tens)')plt.title("Learning rate ="+str(learning_rate))plt.show()return parameters
訓練
parameters = two_layer_model(train_x, train_y, layers_dims =(n_x, n_h, n_y), num_iterations =2500, print_cost=True)
Cost after iteration 0:0.693049735659989
Cost after iteration 100:0.6464320953428849
Cost after iteration 200:0.6325140647912678
Cost after iteration 300:0.6015024920354665
Cost after iteration 400:0.5601966311605747
Cost after iteration 500:0.5158304772764729
Cost after iteration 600:0.4754901313943325
Cost after iteration 700:0.43391631512257495
Cost after iteration 800:0.4007977536203887
Cost after iteration 900:0.35807050113237976
Cost after iteration 1000:0.33942815383664127
Cost after iteration 1100:0.30527536361962654
Cost after iteration 1200:0.2749137728213016
Cost after iteration 1300:0.24681768210614846
Cost after iteration 1400:0.19850735037466097
Cost after iteration 1500:0.17448318112556657
Cost after iteration 1600:0.1708076297809689
Cost after iteration 1700:0.11306524562164715
Cost after iteration 1800:0.09629426845937145
Cost after iteration 1900:0.08342617959726863
Cost after iteration 2000:0.07439078704319078
Cost after iteration 2100:0.06630748132267933
Cost after iteration 2200:0.0591932950103817
Cost after iteration 2300:0.05336140348560554
Cost after iteration 2400:0.04855478562877016
### CONSTANTS ###
layers_dims =[12288,20,7,5,1]# 5-layer model
組件模型
# GRADED FUNCTION: L_layer_modeldefL_layer_model(X, Y, layers_dims, learning_rate =0.0075, num_iterations =3000, print_cost=False):#lr was 0.009"""Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID.Arguments:X -- data, numpy array of shape (number of examples, num_px * num_px * 3)Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)layers_dims -- list containing the input size and each layer size, of length (number of layers + 1).learning_rate -- learning rate of the gradient descent update rulenum_iterations -- number of iterations of the optimization loopprint_cost -- if True, it prints the cost every 100 stepsReturns:parameters -- parameters learnt by the model. They can then be used to predict."""np.random.seed(1)costs =[]# keep track of cost# Parameters initialization.### START CODE HERE ###parameters = initialize_parameters_deep(layers_dims)### END CODE HERE #### Loop (gradient descent)for i inrange(0, num_iterations):# Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID.### START CODE HERE ### (≈ 1 line of code)AL, caches = L_model_forward(X, parameters)### END CODE HERE #### Compute cost.### START CODE HERE ### (≈ 1 line of code)cost = compute_cost(AL, Y)### END CODE HERE #### Backward propagation.### START CODE HERE ### (≈ 1 line of code)grads = L_model_backward(AL, Y, caches)### END CODE HERE #### Update parameters.### START CODE HERE ### (≈ 1 line of code)parameters = update_parameters(parameters, grads, learning_rate)### END CODE HERE #### Print the cost every 100 training exampleif print_cost and i %100==0:print("Cost after iteration %i: %f"%(i, cost))if print_cost and i %100==0:costs.append(cost)# plot the costplt.plot(np.squeeze(costs))plt.ylabel('cost')plt.xlabel('iterations (per tens)')plt.title("Learning rate ="+str(learning_rate))plt.show()return parameters
訓練
parameters = L_layer_model(train_x, train_y, layers_dims, num_iterations =2500, print_cost =True)
Cost after iteration 0:0.771749
Cost after iteration 100:0.672053
Cost after iteration 200:0.648263
Cost after iteration 300:0.611507
Cost after iteration 400:0.567047
Cost after iteration 500:0.540138
Cost after iteration 600:0.527930
Cost after iteration 700:0.465477
Cost after iteration 800:0.369126
Cost after iteration 900:0.391747
Cost after iteration 1000:0.315187
Cost after iteration 1100:0.272700
Cost after iteration 1200:0.237419
Cost after iteration 1300:0.199601
Cost after iteration 1400:0.189263
Cost after iteration 1500:0.161189
Cost after iteration 1600:0.148214
Cost after iteration 1700:0.137775
Cost after iteration 1800:0.129740
Cost after iteration 1900:0.121225
Cost after iteration 2000:0.113821
Cost after iteration 2100:0.107839
Cost after iteration 2200:0.102855
Cost after iteration 2300:0.100897
Cost after iteration 2400:0.092878
defprint_mislabeled_images(classes, X, y, p):"""Plots images where predictions and truth were different.X -- datasety -- true labelsp -- predictions"""a = p + ymislabeled_indices = np.asarray(np.where(a ==1))# 0+1, 1+0, wrong caseplt.rcParams['figure.figsize']=(40.0,40.0)# set default size of plotsnum_images =len(mislabeled_indices[0])for i inrange(num_images):index = mislabeled_indices[1][i]plt.subplot(2, num_images, i +1)plt.imshow(X[:,index].reshape(64,64,3), interpolation='nearest')plt.axis('off')plt.title("Prediction: "+ classes[int(p[0,index])].decode("utf-8")+" \n Class: "+ classes[y[0,index]].decode("utf-8"))print_mislabeled_images(classes, test_x, test_y, pred_test)
錯誤特點:
貓的身體在一個不尋常的位置
貓出現在一個相似顏色的背景下
不常見的貓顏色和種類
照相機角度
圖片的亮度
大小程度(貓在圖像中非常大或很小)
7. 用自己的圖片測試
## START CODE HERE ##
my_image ="my_image.jpg"# change this to the name of your image file
my_label_y =[1]# the true class of your image (1 -> cat, 0 -> non-cat)## END CODE HERE ##fname ="images/"+ my_image
image = Image.open(fname)
my_image = np.array(image.resize((num_px,num_px))).reshape((num_px*num_px*3,1))
my_predicted_image = predict(my_image, my_label_y, parameters)plt.imshow(image)print("y = "+str(np.squeeze(my_predicted_image))+", your L-layer model predicts a \""+ classes[int(np.squeeze(my_predicted_image)),].decode("utf-8")+"\" picture.")
Accuracy:1.0
y =1.0, your L-layer model predicts a "cat" picture.