使用Tensorflow构建和训练自己的CNN来做简单的验证码识别
??????? Tensorflow是目前最流行的深度學習框架,我們可以用它來搭建自己的卷積神經網絡并訓練自己的分類器,本文介紹怎樣使用Tensorflow構建自己的CNN,怎樣訓練用于簡單的驗證碼識別的分類器。本文假設你已經安裝好了Tensorflow,了解過CNN的一些知識。
下面將分步介紹怎樣獲得訓練數據,怎樣使用tensorflow構建卷積神經網絡,怎樣訓練,以及怎樣測試訓練出來的分類器
1. 準備訓練樣本
??????? 使用Python的庫captcha來生成我們需要的訓練樣本,代碼如下:
?
import sys
??? import os
??? import shutil
??? import random
??? import time
??? #captcha是用于生成驗證碼圖片的庫,可以 pip install captcha 來安裝它
??? from captcha.image import ImageCaptcha
??? ?
??? #用于生成驗證碼的字符集
??? CHAR_SET = ['0','1','2','3','4','5','6','7','8','9']
??? #字符集的長度
??? CHAR_SET_LEN = 10
??? #驗證碼的長度,每個驗證碼由4個數字組成
??? CAPTCHA_LEN = 4
??? ?
??? #驗證碼圖片的存放路徑
??? CAPTCHA_IMAGE_PATH = 'E:/Tensorflow/captcha/images/'
??? #用于模型測試的驗證碼圖片的存放路徑,它里面的驗證碼圖片作為測試集
??? TEST_IMAGE_PATH = 'E:/Tensorflow/captcha/test/'
??? #用于模型測試的驗證碼圖片的個數,從生成的驗證碼圖片中取出來放入測試集中
??? TEST_IMAGE_NUMBER = 50
??? ?
??? #生成驗證碼圖片,4位的十進制數字可以有10000種驗證碼
??? def generate_captcha_image(charSet = CHAR_SET, charSetLen=CHAR_SET_LEN, captchaImgPath=CAPTCHA_IMAGE_PATH):? ?
??????? k? = 0
??????? total = 1
??????? for i in range(CAPTCHA_LEN):
??????????? total *= charSetLen
?????????? ?
??????? for i in range(charSetLen):
??????????? for j in range(charSetLen):
??????????????? for m in range(charSetLen):
??????????????????? for n in range(charSetLen):
??????????????????????? captcha_text = charSet[i] + charSet[j] + charSet[m] + charSet[n]
??????????????????????? image = ImageCaptcha()
??????????????????????? image.write(captcha_text, captchaImgPath + captcha_text + '.jpg')
??????????????????????? k += 1
??????????????????????? sys.stdout.write("\rCreating %d/%d" % (k, total))
??????????????????????? sys.stdout.flush()
?????????????????????? ?
??? #從驗證碼的圖片集中取出一部分作為測試集,這些圖片不參加訓練,只用于模型的測試?????????????????? ?
??? def prepare_test_set():
??????? fileNameList = []?? ?
??????? for filePath in os.listdir(CAPTCHA_IMAGE_PATH):
??????????? captcha_name = filePath.split('/')[-1]
??????????? fileNameList.append(captcha_name)
??????? random.seed(time.time())
??????? random.shuffle(fileNameList)
??????? for i in range(TEST_IMAGE_NUMBER):
??????????? name = fileNameList[i]
??????????? shutil.move(CAPTCHA_IMAGE_PATH + name, TEST_IMAGE_PATH + name)
?????????????????????????? ?
??? if __name__ == '__main__':
??????? generate_captcha_image(CHAR_SET, CHAR_SET_LEN, CAPTCHA_IMAGE_PATH)
??????? prepare_test_set()
??????? sys.stdout.write("\nFinished")
??????? sys.stdout.flush() ?
?
運行上面的代碼,可以生成驗證碼圖片,
生成的驗證碼圖片如下圖所示:
?
2. 構建CNN,訓練分類器
???? 代碼如下:
?
??? import tensorflow as tf
??? import numpy as np
??? from PIL import Image
??? import os
??? import random
??? import time
??? ?
??? #驗證碼圖片的存放路徑
??? CAPTCHA_IMAGE_PATH = 'E:/Tensorflow/captcha/images/'
??? #驗證碼圖片的寬度
??? CAPTCHA_IMAGE_WIDHT = 160
??? #驗證碼圖片的高度
??? CAPTCHA_IMAGE_HEIGHT = 60
??? ?
??? CHAR_SET_LEN = 10
??? CAPTCHA_LEN = 4
??? ?
??? #60%的驗證碼圖片放入訓練集中
??? TRAIN_IMAGE_PERCENT = 0.6
??? #訓練集,用于訓練的驗證碼圖片的文件名
??? TRAINING_IMAGE_NAME = []
??? #驗證集,用于模型驗證的驗證碼圖片的文件名
VALIDATION_IMAGE_NAME = []
?
??? #存放訓練好的模型的路徑
??? MODEL_SAVE_PATH = 'E:/Tensorflow/captcha/models/'
??? ?
??? def get_image_file_name(imgPath=CAPTCHA_IMAGE_PATH):
??????? fileName = []
??????? total = 0
??????? for filePath in os.listdir(imgPath):
??????????? captcha_name = filePath.split('/')[-1]
??????????? fileName.append(captcha_name)
??????????? total += 1
??????? return fileName, total
?????? ?
??? #將驗證碼轉換為訓練時用的標簽向量,維數是 40? ?
??? #例如,如果驗證碼是 ‘0296’ ,則對應的標簽是
??? # [1 0 0 0 0 0 0 0 0 0
??? #? 0 0 1 0 0 0 0 0 0 0
??? #? 0 0 0 0 0 0 0 0 0 1
??? #? 0 0 0 0 0 0 1 0 0 0]
??? def name2label(name):
??????? label = np.zeros(CAPTCHA_LEN * CHAR_SET_LEN)
??????? for i, c in enumerate(name):
??????????? idx = i*CHAR_SET_LEN + ord(c) - ord('0')
??????????? label[idx] = 1
??????? return label
?????? ?
??? #取得驗證碼圖片的數據以及它的標簽?????? ?
??? def get_data_and_label(fileName, filePath=CAPTCHA_IMAGE_PATH):
??????? pathName = os.path.join(filePath, fileName)
??????? img = Image.open(pathName)
??????? #轉為灰度圖
??????? img = img.convert("L")????? ?
??????? image_array = np.array(img)?? ?
??????? image_data = image_array.flatten()/255
??????? image_label = name2label(fileName[0:CAPTCHA_LEN])
??????? return image_data, image_label
?????? ?
??? #生成一個訓練batch?? ?
??? def get_next_batch(batchSize=32, trainOrTest='train', step=0):
??????? batch_data = np.zeros([batchSize, CAPTCHA_IMAGE_WIDHT*CAPTCHA_IMAGE_HEIGHT])
??????? batch_label = np.zeros([batchSize, CAPTCHA_LEN * CHAR_SET_LEN])
??????? fileNameList = TRAINING_IMAGE_NAME
??????? if trainOrTest == 'validate':?????? ?
??????????? fileNameList = VALIDATION_IMAGE_NAME
?????????? ?
??????? totalNumber = len(fileNameList)
??????? indexStart = step*batchSize?? ?
??????? for i in range(batchSize):
??????????? index = (i + indexStart) % totalNumber
??????????? name = fileNameList[index]?????? ?
??????????? img_data, img_label = get_data_and_label(name)
??????????? batch_data[i, : ] = img_data
??????????? batch_label[i, : ] = img_label ?
??? ?
??????? return batch_data, batch_label
?????? ?
??? #構建卷積神經網絡并訓練
??? def train_data_with_CNN():
??????? #初始化權值
??????? def weight_variable(shape, name='weight'):
??????????? init = tf.truncated_normal(shape, stddev=0.1)
??????????? var = tf.Variable(initial_value=init, name=name)
??????????? return var
??????? #初始化偏置?? ?
??????? def bias_variable(shape, name='bias'):
??????????? init = tf.constant(0.1, shape=shape)
??????????? var = tf.Variable(init, name=name)
??????????? return var
??????? #卷積?? ?
??????? def conv2d(x, W, name='conv2d'):
??????????? return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME', name=name)
??????? #池化
??????? def max_pool_2X2(x, name='maxpool'):
??????????? return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME', name=name)??? ?
????? ?
??????? #輸入層
??????? #請注意 X 的 name,在測試model時會用到它
??????? X = tf.placeholder(tf.float32, [None, CAPTCHA_IMAGE_WIDHT * CAPTCHA_IMAGE_HEIGHT], name='data-input')
??????? Y = tf.placeholder(tf.float32, [None, CAPTCHA_LEN * CHAR_SET_LEN], name='label-input')?? ?
??????? x_input = tf.reshape(X, [-1, CAPTCHA_IMAGE_HEIGHT, CAPTCHA_IMAGE_WIDHT, 1], name='x-input')
??????? #dropout,防止過擬合
??????? #請注意 keep_prob 的 name,在測試model時會用到它
??????? keep_prob = tf.placeholder(tf.float32, name='keep-prob')
??????? #第一層卷積
??????? W_conv1 = weight_variable([5,5,1,32], 'W_conv1')
??????? B_conv1 = bias_variable([32], 'B_conv1')
??????? conv1 = tf.nn.relu(conv2d(x_input, W_conv1, 'conv1') + B_conv1)
??????? conv1 = max_pool_2X2(conv1, 'conv1-pool')
??????? conv1 = tf.nn.dropout(conv1, keep_prob)
??????? #第二層卷積
??????? W_conv2 = weight_variable([5,5,32,64], 'W_conv2')
??????? B_conv2 = bias_variable([64], 'B_conv2')
??????? conv2 = tf.nn.relu(conv2d(conv1, W_conv2,'conv2') + B_conv2)
??????? conv2 = max_pool_2X2(conv2, 'conv2-pool')
??????? conv2 = tf.nn.dropout(conv2, keep_prob)
??????? #第三層卷積
??????? W_conv3 = weight_variable([5,5,64,64], 'W_conv3')
??????? B_conv3 = bias_variable([64], 'B_conv3')
??????? conv3 = tf.nn.relu(conv2d(conv2, W_conv3, 'conv3') + B_conv3)
??????? conv3 = max_pool_2X2(conv3, 'conv3-pool')
??????? conv3 = tf.nn.dropout(conv3, keep_prob)
??????? #全鏈接層
??????? #每次池化后,圖片的寬度和高度均縮小為原來的一半,進過上面的三次池化,寬度和高度均縮小8倍
??????? W_fc1 = weight_variable([20*8*64, 1024], 'W_fc1')
??????? B_fc1 = bias_variable([1024], 'B_fc1')
??????? fc1 = tf.reshape(conv3, [-1, 20*8*64])
??????? fc1 = tf.nn.relu(tf.add(tf.matmul(fc1, W_fc1), B_fc1))
??????? fc1 = tf.nn.dropout(fc1, keep_prob)
??????? #輸出層
??????? W_fc2 = weight_variable([1024, CAPTCHA_LEN * CHAR_SET_LEN], 'W_fc2')
??????? B_fc2 = bias_variable([CAPTCHA_LEN * CHAR_SET_LEN], 'B_fc2')
??????? output = tf.add(tf.matmul(fc1, W_fc2), B_fc2, 'output')
?????? ?
??????? loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=output))
??????? optimizer = tf.train.AdamOptimizer(0.001).minimize(loss)
?????? ?
??????? predict = tf.reshape(output, [-1, CAPTCHA_LEN, CHAR_SET_LEN], name='predict')
??????? labels = tf.reshape(Y, [-1, CAPTCHA_LEN, CHAR_SET_LEN], name='labels')
??????? #預測結果
??????? #請注意 predict_max_idx 的 name,在測試model時會用到它
??????? predict_max_idx = tf.argmax(predict, axis=2, name='predict_max_idx')
??????? labels_max_idx = tf.argmax(labels, axis=2, name='labels_max_idx')
??????? predict_correct_vec = tf.equal(predict_max_idx, labels_max_idx)
??????? accuracy = tf.reduce_mean(tf.cast(predict_correct_vec, tf.float32))
?????? ?
??????? saver = tf.train.Saver()
??????? with tf.Session() as sess:
??????????? sess.run(tf.global_variables_initializer())
??????????? steps = 0
??????????? for epoch in range(6000):
??????????????? train_data, train_label = get_next_batch(64, 'train', steps)
??????????????? sess.run(optimizer, feed_dict={X : train_data, Y : train_label, keep_prob:0.75})
??????????????? if steps % 100 == 0:
??????????????????? test_data, test_label = get_next_batch(100, 'validate', steps)
??????????????????? acc = sess.run(accuracy, feed_dict={X : test_data, Y : test_label, keep_prob:1.0})
??????????????????? print("steps=%d, accuracy=%f" % (steps, acc))
??????????????????? if acc > 0.99:
??????????????????????? saver.save(sess, MODEL_SAVE_PATH+"crack_captcha.model", global_step=steps)
??????????????????????? break
??????????????? steps += 1
??? ?
??? if __name__ == '__main__':?? ?
??????? image_filename_list, total = get_image_file_name(CAPTCHA_IMAGE_PATH)
??????? random.seed(time.time())
??????? #打亂順序
??????? random.shuffle(image_filename_list)
??????? trainImageNumber = int(total * TRAIN_IMAGE_PERCENT)
??????? #分成測試集
??????? TRAINING_IMAGE_NAME = image_filename_list[ : trainImageNumber]
??????? #和驗證集
??????? VALIDATION_IMAGE_NAME = image_filename_list[trainImageNumber : ]
??????? train_data_with_CNN()?? ?
??????? print('Training finished')
運行上面的代碼,開始訓練,訓練要花些時間,如果沒有GPU的話,會慢些,
訓練完后,輸出如下結果,經過4100次的迭代,訓練出來的分類器模型在驗證集上識別的準確率為99.5%
生成的模型文件如下,在模型測試時將用到這些文件
?
3. 測試模型
編寫代碼,對訓練出來的模型進行測試
?
import tensorflow as tf
??? import numpy as np
??? from PIL import Image
??? import os
??? import matplotlib.pyplot as plt
??? ?
??? CAPTCHA_LEN = 4
??? ?
??? MODEL_SAVE_PATH = 'E:/Tensorflow/captcha/models/'
??? TEST_IMAGE_PATH = 'E:/Tensorflow/captcha/test/'
??? ?
??? def get_image_data_and_name(fileName, filePath=TEST_IMAGE_PATH):
??????? pathName = os.path.join(filePath, fileName)
??????? img = Image.open(pathName)
??????? #轉為灰度圖
??????? img = img.convert("L")????? ?
??????? image_array = np.array(img)?? ?
??????? image_data = image_array.flatten()/255
??????? image_name = fileName[0:CAPTCHA_LEN]
??????? return image_data, image_name
??? ?
??? def digitalStr2Array(digitalStr):
??????? digitalList = []
??????? for c in digitalStr:
??????????? digitalList.append(ord(c) - ord('0'))
??????? return np.array(digitalList)
??? ?
??? def model_test():
??????? nameList = []
??????? for pathName in os.listdir(TEST_IMAGE_PATH):
??????????? nameList.append(pathName.split('/')[-1])
??????? totalNumber = len(nameList)
??????? #加載graph
??????? saver = tf.train.import_meta_graph(MODEL_SAVE_PATH+"crack_captcha.model-4100.meta")
??????? graph = tf.get_default_graph()
??????? #從graph取得 tensor,他們的name是在構建graph時定義的(查看上面第2步里的代碼)
??????? input_holder = graph.get_tensor_by_name("data-input:0")
??????? keep_prob_holder = graph.get_tensor_by_name("keep-prob:0")
??????? predict_max_idx = graph.get_tensor_by_name("predict_max_idx:0")
??????? with tf.Session() as sess:
??????????? saver.restore(sess, tf.train.latest_checkpoint(MODEL_SAVE_PATH))
??????????? count = 0
??????????? for fileName in nameList:
??????????????? img_data, img_name = get_image_data_and_name(fileName, TEST_IMAGE_PATH)
??????????????? predict = sess.run(predict_max_idx, feed_dict={input_holder:[img_data], keep_prob_holder : 1.0})?????????? ?
??????????????? filePathName = TEST_IMAGE_PATH + fileName
??????????????? print(filePathName)
??????????????? img = Image.open(filePathName)
??????????????? plt.imshow(img)
??????????????? plt.axis('off')
??????????????? plt.show()
??????????????? predictValue = np.squeeze(predict)
??????????????? rightValue = digitalStr2Array(img_name)
??????????????? if np.array_equal(predictValue, rightValue):
??????????????????? result = '正確'
??????????????????? count += 1
??????????????? else:
??????????????????? result = '錯誤'?????????? ?
??????????????? print('實際值:{}, 預測值:{},測試結果:{}'.format(rightValue, predictValue, result))
??????????????? print('\n')
?????????????? ?
??????????? print('正確率:%.2f%%(%d/%d)' % (count*100/totalNumber, count, totalNumber))
??? ?
??? if __name__ == '__main__':
??????? model_test()
?
?
對模型的測試結果如下,在測試集上識別的準確率為 94%
下面是兩個識別錯誤的驗證碼
訓練出的模型放在了下面的云盤里,有興趣的同學可以用它做下驗證碼的識別
https://pan.baidu.com/s/1hsfCA6S
總結
以上是生活随笔為你收集整理的使用Tensorflow构建和训练自己的CNN来做简单的验证码识别的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 写给初学者的深度学习教程之 MNIST
- 下一篇: Discuz验证码识别(上线篇)-写给程