深度学习笔记(6)BatchNorm批量标准化
生活随笔
收集整理的這篇文章主要介紹了
深度学习笔记(6)BatchNorm批量标准化
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
代碼實現:
數據:
(X_train, y_train), (X_test, y_test) = load_data() y_train = np.eye(10)[y_train.astype(int)] y_test = np.eye(10)[y_test.astype(int)] X_train = X_train.reshape(-1, X_train.shape[1]*X_train.shape[2]).astype('float32') X_test = X_test.reshape(-1, X_test.shape[1]*X_test.shape[2]).astype('float32') print(X_train.shape, y_train.shape) N = 20000 # 取 20000 條數據用以訓練 indices = np.random.permutation(range(X_train.shape[0]))[:N] X_train, y_train = X_train[indices], y_train[indices] print(X_train.shape, y_train.shape) X_train /= 255 X_train = (X_train - 0.5) * 2 X_test /= 255 X_test = (X_test - 0.5) * 2定義具有BatchNorm層的全連接2層網絡:
class DFN2(object):def __init__(self,hidden_dims_1=None,hidden_dims_2=None,optimizer="sgd(lr=0.01)",init_w="std_normal",loss=CrossEntropy()):self.optimizer = optimizerself.init_w = init_wself.loss = lossself.hidden_dims_1 = hidden_dims_1self.hidden_dims_2 = hidden_dims_2self.is_initialized = Falsedef _set_params(self):"""函數作用:模型初始化FC1 -> Sigmoid -> BN -> FC2 -> Softmax"""self.layers = OrderedDict()self.layers["FC1"] = FullyConnected(n_out=self.hidden_dims_1,acti_fn="sigmoid",init_w=self.init_w,optimizer=self.optimizer)self.layers["BN"] = BatchNorm1D(optimizer=self.optimizer)self.layers["FC2"] = FullyConnected(n_out=self.hidden_dims_2,acti_fn="affine(slope=1,intercept=0)",init_w=self.init_w,optimizer=self.optimizer)self.layers["Softmax"] = Softmax(dim=-1, optimizer=self.optimizer)self.softmax = Softmax(dim=-1, optimizer=self.optimizer)self.is_initialized = Truedef forward(self, X_train):Xs = {}out = X_trainfor k, v in self.layers.items():Xs[k] = out # 每一層的輸出。out = v.forward(out)return out, Xsdef backward(self, grad):dXs = {}out = gradfor k, v in reversed(list(self.layers.items())):dXs[k] = outout = v.backward(out)return out, dXsdef update(self):"""函數作用:梯度更新"""for k, v in reversed(list(self.layers.items())):v.update()self.flush_gradients() # 反向更新梯度。def flush_gradients(self, curr_loss=None):"""函數作用:更新后重置梯度"""for k, v in self.layers.items():v.flush_gradients()def fit(self, X_train, y_train, n_epochs=20, batch_size=64, verbose=False):"""參數說明:X_train:訓練數據y_train:訓練數據標簽n_epochs:epoch 次數batch_size:每次 epoch 的 batch sizeverbose:是否每個 batch 輸出損失"""self.verbose = verboseself.n_epochs = n_epochsself.batch_size = batch_sizeif not self.is_initialized:self.n_features = X_train.shape[1]self._set_params()prev_loss = np.inf# softmax = self.softmax.forward #for i in range(n_epochs):loss, epoch_start = 0.0, time.time()batch_generator, n_batch = minibatch(X_train, self.batch_size, shuffle=True)for j, batch_idx in enumerate(batch_generator):batch_len, batch_start = len(batch_idx), time.time()X_batch, y_batch = X_train[batch_idx], y_train[batch_idx]out, _ = self.forward(X_batch)# y_pred_batch = softmax(out) #y_pred_batch = out #batch_loss = self.loss(y_batch, y_pred_batch)grad = self.loss.grad(y_batch, y_pred_batch) # 計算cross_entroy 交叉熵的梯度。_, _ = self.backward(grad) # 方向傳播,計算梯度。self.update() # 用梯度來更新參數。更新后將梯度清零。loss += batch_loss # 計算loss.if self.verbose:fstr = "\t[Batch {}/{}] Train loss: {:.3f} ({:.1f}s/batch)"print(fstr.format(j + 1, n_batch, batch_loss, time.time() - batch_start))loss /= n_batchfstr = "[Epoch {}] Avg. loss: {:.3f} Delta: {:.3f} ({:.2f}m/epoch)"print(fstr.format(i + 1, loss, prev_loss - loss, (time.time() - epoch_start) / 60.0))prev_loss = lossdef evaluate(self, X_test, y_test, batch_size=128):acc = 0.0batch_generator, n_batch = minibatch(X_test, batch_size, shuffle=True)for j, batch_idx in enumerate(batch_generator):batch_len, batch_start = len(batch_idx), time.time()X_batch, y_batch = X_test[batch_idx], y_test[batch_idx]y_pred_batch, _ = self.forward(X_batch)y_pred_batch = np.argmax(y_pred_batch, axis=1)y_batch = np.argmax(y_batch, axis=1)acc += np.sum(y_pred_batch == y_batch)return acc / X_test.shape[0]@propertydef hyperparams(self):return {"init_w": self.init_w,"loss": str(self.loss),"optimizer": self.optimizer,"hidden_dims_1": self.hidden_dims_1,"hidden_dims_2": self.hidden_dims_2,"components": {k: v.params for k, v in self.layers.items()}}訓練測試:
""" 訓練""" # model = DFN(hidden_dims_1=200, hidden_dims_2=10) # model.fit(X_train, y_train, n_epochs=20) # print(model.evaluate(X_test,y_test)) #model = DFN2(hidden_dims_1=200, hidden_dims_2=10) model.fit(X_train, y_train, n_epochs=20, batch_size=64)print("accuracy:{}".format(model.evaluate(X_test, y_test)))結果:
[Epoch 1] Avg. loss: 1.863 Delta: inf (0.03m/epoch) [Epoch 2] Avg. loss: 1.164 Delta: 0.699 (0.03m/epoch) [Epoch 3] Avg. loss: 0.830 Delta: 0.334 (0.03m/epoch) [Epoch 4] Avg. loss: 0.654 Delta: 0.176 (0.03m/epoch) [Epoch 5] Avg. loss: 0.558 Delta: 0.095 (0.03m/epoch) [Epoch 6] Avg. loss: 0.504 Delta: 0.055 (0.03m/epoch) [Epoch 7] Avg. loss: 0.466 Delta: 0.038 (0.03m/epoch) [Epoch 8] Avg. loss: 0.442 Delta: 0.024 (0.03m/epoch) [Epoch 9] Avg. loss: 0.422 Delta: 0.021 (0.03m/epoch) [Epoch 10] Avg. loss: 0.407 Delta: 0.014 (0.03m/epoch) [Epoch 11] Avg. loss: 0.397 Delta: 0.010 (0.03m/epoch) [Epoch 12] Avg. loss: 0.384 Delta: 0.013 (0.03m/epoch) [Epoch 13] Avg. loss: 0.376 Delta: 0.009 (0.03m/epoch) [Epoch 14] Avg. loss: 0.370 Delta: 0.005 (0.03m/epoch) [Epoch 15] Avg. loss: 0.363 Delta: 0.008 (0.03m/epoch) [Epoch 16] Avg. loss: 0.356 Delta: 0.007 (0.03m/epoch) [Epoch 17] Avg. loss: 0.350 Delta: 0.006 (0.03m/epoch) [Epoch 18] Avg. loss: 0.345 Delta: 0.005 (0.03m/epoch) [Epoch 19] Avg. loss: 0.339 Delta: 0.006 (0.03m/epoch) [Epoch 20] Avg. loss: 0.339 Delta: 0.000 (0.03m/epoch) accuracy:0.9131總結
以上是生活随笔為你收集整理的深度学习笔记(6)BatchNorm批量标准化的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 使用koa2+wechaty打造个人微信
- 下一篇: PHP移动互联网开发笔记(1)——环境搭