【人工智能项目】深度学习实现汉字书法识别
生活随笔
收集整理的這篇文章主要介紹了
【人工智能项目】深度学习实现汉字书法识别
小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.
【人工智能項目】深度學習實現(xiàn)漢字書法識別
背景介紹
競賽數(shù)據(jù)提供100個漢字書法單字,包括碑帖,手寫書法,古漢字等。圖片全部為單通道寬度jpg,寬高不定。
數(shù)據(jù)集介紹
-
訓練集:每個漢字400張圖片,共計40000張圖片,訓練集是標注好的數(shù)據(jù),圖片按照圖片上的文字分類到不同的文件夾中,也就是生活文件夾的名字就是文件夾里面所有圖片的標簽。
-
測試集:
- 第一部分:每漢字100張圖片共計10000張圖片,供參賽人員測試算法模型
- 第二部分:每漢子50張以上圖片共1643張圖片,用來評測。
-
提交csv文件
- 文件第一列是測試集中圖片文件的文件名,第二列是推斷出來的圖片上文字可能的五個漢字。
思路
- 提取數(shù)據(jù)集
- 定義fine-tune模型
- VGG16模型
- ResNet50模型
- Xception模型
- InceptionV3模型
具體流程
提取數(shù)據(jù)集
# 導入所需的模塊 import os import cv2 import numpy as npfrom sklearn.preprocessing import LabelBinarizer from sklearn.model_selection import train_test_split # 獲取標簽的編碼器 train_path = "/content/train" label_char = os.listdir(train_path) label_char.sort() encode_label = LabelBinarizer() encode_label.fit(label_char)LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)
# 定義讀取圖片函數(shù) def get_img(file_path,img_rows,img_cols):image = cv2.imread(file_path,0)image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)image = cv2.resize(image,(img_rows,img_cols))feature = np.array(image,dtype=np.uint8)return feature # 定義加載訓練集的函數(shù)def load_train_data(train_path,img_rows,img_cols):x_train = []y_train = []dirs = os.listdir(train_path)for dir_name in dirs:path = train_path + "/" + dir_namefor img_name in os.listdir(path):feature = get_img(os.path.join(path,img_name),img_rows,img_cols)label = dir_namex_train.append(feature)y_train.append(label)# 對y_train進行one-hot編碼y_train = np.array(encode_label.transform(y_train),dtype=np.uint8)# 對x_train進行轉換x_train = np.array(x_train, dtype=np.uint8)# 對訓練集進行隨機打亂,并劃分訓練集和驗證集x_train,x_valid,y_train,y_valid = train_test_split(x_train,y_train,test_size=0.2,random_state=2019)return x_train,x_valid,y_train,y_valid # 定義加載測試集的函數(shù) def load_test_data(test_path,img_rows,img_cols):x_test_id = []x_test = []img_names = os.listdir(test_path)for img_name in img_names:feature = get_img(os.path.join(test_path,img_name),img_rows,img_cols)id = img_namex_test_id.append(id)x_test.append(feature)#對x_test進行轉換x_test = np.array(x_test,dtype=np.uint8)return x_test,x_test_id # 加載訓練和驗證數(shù)據(jù)和標簽 img_rows,img_cols = 224,224 x_train,x_valid,y_train,y_valid = load_train_data(train_path,img_rows,img_cols) # 加載待預測的數(shù)據(jù)和標簽 test_path = "/content/test2" x_test,x_test_id = load_test_data(test_path,img_rows,img_cols) # 查看一下數(shù)據(jù)和標簽 print(x_train.shape) print(y_train.shape) print(x_valid.shape) print(y_valid.shape) print(x_test.shape) print(x_test_id[:5]) # 查看一下數(shù)據(jù)和標簽 import matplotlib.pyplot as plt %matplotlib inlineprint(label_char[y_train[0].argmax()])plt.imshow(x_train[0])fine-tune模型
選擇keras中預訓練好的模型,進行fine-tune。
# 導入開發(fā)需要的庫 from keras import optimizers, Input from keras.applications import imagenet_utilsfrom keras.models import * from keras.layers import * from keras.optimizers import * from keras.callbacks import * from keras.applications import *from sklearn.preprocessing import * from sklearn.model_selection import * from sklearn.metrics import * # 繪制訓練過程中的 loss 和 acc 變化曲線 import matplotlib.pyplot as plt %matplotlib inlinedef history_plot(history_fit):plt.figure(figsize=(12,6))# summarize history for accuracyplt.subplot(121)plt.plot(history_fit.history["acc"])plt.plot(history_fit.history["val_acc"])plt.title("model accuracy")plt.ylabel("accuracy")plt.xlabel("epoch")plt.legend(["train", "valid"], loc="upper left")# summarize history for lossplt.subplot(122)plt.plot(history_fit.history["loss"])plt.plot(history_fit.history["val_loss"])plt.title("model loss")plt.ylabel("loss")plt.xlabel("epoch")plt.legend(["train", "test"], loc="upper left")plt.show() # fine-tune 模型 def fine_tune_model(model, optimizer, batch_size, epochs, freeze_num):'''discription: 對指定預訓練模型進行fine-tune,并保存為.hdf5格式MODEL:傳入的模型,VGG16, ResNet50, ...optimizer: fine-tune all layers 的優(yōu)化器, first part默認用adadeltabatch_size: 每一批的尺寸,建議32/64/128epochs: fine-tune all layers的代數(shù)freeze_num: first part凍結卷積層的數(shù)量'''# first: 僅訓練全連接層(權重隨機初始化的)# 凍結所有卷積層for layer in model.layers[:freeze_num]:layer.trainable = Falsemodel.compile(optimizer='adadelta', loss='categorical_crossentropy',metrics=['accuracy'])model.fit(x=x_train, y=y_train, batch_size=batch_size, epochs=2, shuffle=True, verbose=1, validation_data=(x_valid, y_valid))print('Finish step_1')# second: fine-tune all layersfor layer in model.layers[:]:layer.trainable = Truerc = ReduceLROnPlateau(monitor='val_loss',factor=0.5,patience=2,verbose=1,min_delta=1e-4,mode='min')model_name = model.name + '.hdf5'mc = ModelCheckpoint(model_name, monitor='val_loss', save_best_only=True,mode='min')model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])history_fit = model.fit(x=x_train, y=y_train, batch_size=batch_size, epochs=epochs, shuffle=True, verbose=1, validation_data=(x_valid, y_valid),callbacks=[mc, rc])print('Finish fine-tune')#展示fine-tune過程中的loss和accuracy變化曲線history_plot(history_fit)VGG16
a.定義VGG16模型
# 定義一個VGG16的模型 def vgg16_model(img_rows, img_cols):x = Input(shape=(img_rows, img_cols, 3))x = Lambda(imagenet_utils.preprocess_input)(x)base_model = VGG16(input_tensor=x, weights='imagenet', include_top=False, pooling='avg')x = base_model.outputx = Dense(1024, activation='relu', name='fc1')(x)x = Dropout(0.5)(x)predictions = Dense(100, activation='softmax', name='predictions')(x)vgg_model = Model(inputs=base_model.input, outputs=predictions, name='vgg16')return vgg_model # 創(chuàng)建VGG16模型 img_rows, img_cols = 224, 224 vgg_model = vgg16_model(img_rows,img_cols) # 列出每一層的序號和名字 for i,layer in enumerate(vgg_model.layers):print(i,layer.name)
b.VGG16模型訓練
c.VGG16模型預測
ResNet
a.ResNet50模型定義
# 定義一個ResNet50的模型 def restnet50_model(img_rows, img_cols):x = Input(shape=(img_rows, img_cols, 3))x = Lambda(imagenet_utils.preprocess_input)(x)base_model = ResNet50(input_tensor=x, weights='imagenet', include_top=False, pooling='avg')x = base_model.outputx = Dense(1024, activation='relu', name='fc1')(x)x = Dropout(0.5)(x)predictions = Dense(100, activation='softmax', name='predictions')(x)resnet_model = Model(inputs=base_model.input, outputs=predictions, name='resnet50')return resnet_model # 創(chuàng)建ResNet50模型img_rows, img_cols = 224, 224 resnet_model = restnet50_model(img_rows,img_cols) # 列出每一層的序號和名字 for i,layer in enumerate(resnet_model.layers):print(i,layer.name)b.ResNet50模型訓練
# 模型訓練optimizer = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08)batch_size = 32 epochs = 10 freeze_num = 175fine_tune_model(resnet_model, optimizer, batch_size, epochs, freeze_num)
c.ResNet模型預測
Xception
a.定義Xception模型
# 定義一個Xception的模型 def xception_model(img_rows, img_cols):x = Input(shape=(img_rows, img_cols, 3))x = Lambda(imagenet_utils.preprocess_input)(x)base_model = Xception(input_tensor=x, weights='imagenet', include_top=False, pooling='avg')x = base_model.outputx = Dense(1024, activation='relu', name='fc1')(x)x = Dropout(0.5)(x)predictions = Dense(100, activation='softmax', name='predictions')(x)xception_model = Model(inputs=base_model.input, outputs=predictions, name='xception')return xception_model # 創(chuàng)建Xception模型img_rows, img_cols = 224, 224 xception_model = xception_model(img_rows,img_cols) # 列出每一層的序號和名字 for i,layer in enumerate(xception_model.layers):print(i,layer.name)b.Xception模型訓練
# 模型訓練optimizer = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08)batch_size = 32 epochs = 15 freeze_num = 132fine_tune_model(xception_model, optimizer, batch_size, epochs, freeze_num)c.Xception模型預測
# 加載模型權重 xception_model.load_weights("xception.hdf5") # 獲取預測結果 y_preds = xception_model.predict(x_test) from pandas import DataFrame# 定義創(chuàng)建csv文件的函數(shù) def create_submission(y_preds,x_test_id):labels = []for i,_ in enumerate(x_test_id): # key代表預測的character的序號,value代表預測概率pred_dict = {key:value for key,value in enumerate(y_preds[i])} pred_sorted = sorted(pred_dict.items(), key=lambda item:(-item[1])) # pred_sorted: [(k1,v1), (k2,v2), ...]top_5 = ''for j in range(5):index = pred_sorted[j][0]top_5 += label_char[index]labels.append(top_5) result = DataFrame(labels,columns=["labels"])result.insert(0,"filename",x_test_id)result.to_csv("submit3.csv",index=None)print("create submission succesfuly") # 生成csv文件 create_submission(y_preds,x_test_id) import pandas as pd # 預覽一下提交文件 predict_df = pd.read_csv("submit.csv") predict_df.head()InceptionV3
a.InceptionV3模型定義
# 定義一個Inception的模型 def inception_model(img_rows, img_cols):x = Input(shape=(img_rows, img_cols, 3))x = Lambda(imagenet_utils.preprocess_input)(x)base_model = inception_v3.InceptionV3(input_tensor=x, weights='imagenet', include_top=False, pooling='avg')x = base_model.outputx = Dense(1024, activation='relu', name='fc1')(x)x = Dropout(0.5)(x)predictions = Dense(100, activation='softmax', name='predictions')(x)inception_model = Model(inputs=base_model.input, outputs=predictions, name='inceptionv3')return inception_model # 創(chuàng)建Incpetion模型img_rows, img_cols = 224, 224 inception_model = inception_model(img_rows,img_cols) # 列出每一層的序號和名字 for i,layer in enumerate(inception_model.layers):print(i,layer.name)b.InceptionV3模型訓練
# 模型訓練optimizer = optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08)batch_size = 32 epochs = 15 freeze_num = 311fine_tune_model(inception_model, optimizer, batch_size, epochs, freeze_num)
c.InceptionV3模型預測
小結
那接著下一個安排走起吧!!!
總結
以上是生活随笔為你收集整理的【人工智能项目】深度学习实现汉字书法识别的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: vulhub漏洞复现37_Liferay
- 下一篇: APP全局色彩饱和度修改