Unet项目解析(4): ./src/RetinaNN_predict.py
項目GitHub主頁:https://github.com/orobix/retina-unet
參考論文:Retina blood vessel segmentation with a convolution neural network (U-net)?Retina blood vessel segmentation with a convolution neural network (U-net)
1.導入相關模塊
#Python import numpy as np import configparser from matplotlib import pyplot as plt #Keras from keras.models import model_from_json導入sklearn模塊,關于sklearn模塊的詳細說明可以參考fuqiuai的博客,也可以參考官網的使用說明。
sklearn.metric提供了一些函數,用來計算真實值與預測值之間的預測誤差;這里用的評價標準主要集中如下幾個方面:
#scikit learn from sklearn.metrics import roc_curve from sklearn.metrics import roc_auc_score from sklearn.metrics import confusion_matrix from sklearn.metrics import precision_recall_curve from sklearn.metrics import jaccard_similarity_score from sklearn.metrics import f1_score導入依賴的處理腳本文件。
import sys sys.path.insert(0, '/home/shenziehng/anaconda/SpyderProject/Retina_NN/lib/') # help_functions.py from help_functions import * # extract_patches.py from extract_patches import recompone from extract_patches import recompone_overlap from extract_patches import paint_border from extract_patches import kill_border from extract_patches import pred_only_FOV from extract_patches import get_data_testing from extract_patches import get_data_testing_overlap # pre_processing.py from pre_processing import my_PreProc2. 加載配置文件,解析參數
config = configparser.RawConfigParser() config.read('/home/shenziheng/SpyderProject/Retina_NN/configuration.txt') path_data = config.get('data paths', 'path_local') #數據路徑 DRIVE_test_imgs_original = path_data + config.get('data paths', 'test_imgs_original') #測試集圖像封裝文件 test_imgs_orig = load_hdf5(DRIVE_test_imgs_original) #測試集圖像 full_img_height = test_imgs_orig.shape[2] full_img_width = test_imgs_orig.shape[3]DRIVE_test_border_masks = path_data + config.get('data paths', 'test_border_masks') #測試集掩膜封裝文件 test_border_masks = load_hdf5(DRIVE_test_border_masks) # 圖像塊的維度 patch_height = int(config.get('data attributes', 'patch_height')) patch_width = int(config.get('data attributes', 'patch_width')) # 圖像分塊的跳躍步長 stride_height = int(config.get('testing settings', 'stride_height')) stride_width = int(config.get('testing settings', 'stride_width')) assert (stride_height < patch_height and stride_width < patch_width)name_experiment = config.get('experiment name', 'name') path_experiment = './' +name_experiment +'/'Imgs_to_test = int(config.get('testing settings', 'full_images_to_test')) # 20張圖像全部進行預測N_visual = int(config.get('testing settings', 'N_group_visual')) #1average_mode = config.getboolean('testing settings', 'average_mode') # average=True#ground truth gtruth= path_data + config.get('data paths', 'test_groundTruth') #測試集金標準封裝文件 img_truth= load_hdf5(gtruth)visualize(group_images(test_imgs_orig[0:20,:,:,:],5),'original').show() #顯示所有的測試圖像 visualize(group_images(test_border_masks[0:20,:,:,:],5),'borders').show()#顯示所有的掩膜圖像 visualize(group_images(img_truth[0:20,:,:,:],5),'gtruth').show() #顯示所有的金標準圖像3. 圖像分塊、預測
patches_imgs_test = None masks_test = None patches_masks_test = None new_height = None new_width = None if average_mode == True:patches_imgs_test, new_height, new_width, masks_test = get_data_testing_overlap(DRIVE_test_imgs_original = DRIVE_test_imgs_original, #originalDRIVE_test_groudTruth = path_data + config.get('data paths', 'test_groundTruth'), #masksImgs_to_test = int(config.get('testing settings', 'full_images_to_test')),patch_height = patch_height,patch_width = patch_width,stride_height = stride_height,stride_width = stride_width) else:patches_imgs_test, patches_masks_test = get_data_testing(DRIVE_test_imgs_original = DRIVE_test_imgs_original, #originalDRIVE_test_groudTruth = path_data + config.get('data paths', 'test_groundTruth'), #masksImgs_to_test = int(config.get('testing settings', 'full_images_to_test')),patch_height = patch_height,patch_width = patch_width,)前者是采用覆蓋式的圖像塊獲取方法,后者就是簡單的拼湊式。
#================ Run the prediction of the patches ================================== best_last = config.get('testing settings', 'best_last') #加載已經訓練好的模型 和 相關的權重 model = model_from_json(open(path_experiment+name_experiment +'_architecture.json').read()) model.load_weights(path_experiment+name_experiment + '_'+best_last+'_weights.h5') #進行模型預測 predictions = model.predict(patches_imgs_test, batch_size=32, verbose=1) # verbose = 1 采用進度條形式進行顯示 print ("predicted images size :") print (predictions.shape)#===== Convert the prediction arrays in corresponding images pred_patches = pred_to_imgs(predictions, patch_height, patch_width, "original")這里有一個非常重要的函數,就是pred_to_imgs, 后面我會專門寫一遍博客仔細研究一下分塊方法、整合方法、預測結果還原成圖像以及可視化。
# 對于預測的數據將掩膜外的數據清零 kill_border(pred_imgs, test_border_masks) ## back to original dimensions orig_imgs = orig_imgs[:,:,0:full_img_height,0:full_img_width] pred_imgs = pred_imgs[:,:,0:full_img_height,0:full_img_width] gtruth_masks = gtruth_masks[:,:,0:full_img_height,0:full_img_width] print ("Orig imgs shape: " +str(orig_imgs.shape)) print ("pred imgs shape: " +str(pred_imgs.shape)) print ("Gtruth imgs shape: " +str(gtruth_masks.shape)) #可視化結果 對比預測 與 金標準 assert (orig_imgs.shape[0]==pred_imgs.shape[0] and orig_imgs.shape[0]==gtruth_masks.shape[0]) N_predicted = orig_imgs.shape[0] group = N_visual assert (N_predicted%group==0) for i in range(int(N_predicted/group)):orig_stripe = group_images(orig_imgs[i*group:(i*group)+group,:,:,:],group)masks_stripe = group_images(gtruth_masks[i*group:(i*group)+group,:,:,:],group)pred_stripe = group_images(pred_imgs[i*group:(i*group)+group,:,:,:],group)total_img = np.concatenate((orig_stripe,masks_stripe,pred_stripe),axis=0)visualize(total_img,path_experiment+name_experiment +"_Original_GroundTruth_Prediction"+str(i)).show()4. 對深度模型進行評價
作者主要用了sklearn模塊的中模型評價函數, sklearn.metrics。
?
- sklearn.metrics.roc_curve : 受試者工作曲線/準確性評價
計算受試者工作特性曲線Receiver Operating Characteristic, ROC。只能應用于二分類問題。
ROC曲線指受試者工作特征曲線/接收器操作特性(receiver operating characteristic,ROC)曲線,是反映靈敏性和特效性連續變量的綜合指標,是用構圖法揭示敏感性和特異性的相互關系,它通過將連續變量設定出多個不同的臨界值,從而計算出一系列敏感性和特異性。ROC曲線是根據一系列不同的二分類方式(分界值或決定閾),以真正例率(也就是靈敏度)(True Positive Rate,TPR)為縱坐標,假正例率(1-特效性)(False Positive Rate,FPR)為橫坐標繪制的曲線。
ROC觀察模型正確地識別正例的比例與模型錯誤地把負例數據識別成正例的比例之間的權衡。TPR的增加以FPR的增加為代價。ROC曲線下的面積是模型準確率的度量,AUC(Area under roccurve)。
縱坐標:真正率(True Positive Rate , TPR)或靈敏度(sensitivity):TPR = TP /(TP + FN) ?(正樣本預測結果數 / 正樣本實際數)
橫坐標:假正率(False Positive Rate , FPR):FPR = FP /(FP + TN) (被預測為正的負樣本結果數 /負樣本實際數)
該函數返回這三個變量:fpr,tpr,和閾值thresholds; 這里理解thresholds: 分類器的一個重要功能“概率輸出”,即表示分類器認為某個樣本具有多大的概率屬于正樣本(或負樣本)。
Score表示每個測試樣本屬于正樣本的概率。接下來,從高到低,依次將Score值作為閾值threshold,當測試樣本屬于正樣本的概率大于或等于這個threshold時,我們認為它為正樣本,否則為負樣本。每次選取一個不同的threshold,我們就可以得到一組FPR和TPR,即ROC曲線上的一點。當我們將threshold設置為1和0時,分別可以得到ROC曲線上的(0,0)和(1,1)兩個點。將這些(FPR,TPR)對連接起來,就得到了ROC曲線。當threshold取值越多,ROC曲線越平滑。其實,我們并不一定要得到每個測試樣本是正樣本的概率值,只要得到這個分類器對該測試樣本的“評分值”即可(評分值并不一定在(0,1)區間)。評分越高,表示分類器越肯定地認為這個測試樣本是正樣本,而且同時使用各個評分值作為threshold。
#====== Evaluate the results print ("\n\n======== Evaluate the results =======================") # 只預測FOV內部的圖像 y_scores, y_true = pred_only_FOV(pred_imgs,gtruth_masks, test_border_masks) print ("Calculating results only inside the FOV:") print ("y scores pixels: " +str(y_scores.shape[0]) +" (radius 270: 270*270*3.14==228906), including background around retina: " +str(pred_imgs.shape[0]*pred_imgs.shape[2]*pred_imgs.shape[3]) +" (584*565==329960)" print ("y true pixels: " +str(y_true.shape[0]) +" (radius 270: 270*270*3.14==228906), including background around retina: " +str(gtruth_masks.shape[2]*gtruth_masks.shape[3]*gtruth_masks.shape[0])+" (584*565==329960)"# ROC曲線下的面積 fpr, tpr, thresholds = roc_curve((y_true), y_scores) AUC_ROC = roc_auc_score(y_true, y_scores) # test_integral = np.trapz(tpr,fpr) #trapz is numpy integration print ("\n Area under the ROC curve: " +str(AUC_ROC)) roc_curve =plt.figure() plt.plot(fpr,tpr,'-',label='Area Under the Curve (AUC = %0.4f)' % AUC_ROC) plt.title('ROC curve') plt.xlabel("FPR (False Positive Rate)") plt.ylabel("TPR (True Positive Rate)") plt.legend(loc="lower right") plt.savefig(path_experiment+"ROC.png")?
- sklearn.metrics.precision_recall_curve:精確度-召回率曲線
以推薦算法為例:
A:檢索到的,相關的 ? ? ? ? ? ? ? ? ? ?(搜到的也想要的)
B:檢索到的,但是不相關的 ? ? ? ? ? (搜到的但沒用的)
C:未檢索到的,但卻是相關的 ? ? ? ?(沒搜到,然而實際上想要的)
D:未檢索到的,也不相關的 ? ? ? ? ? (沒搜到也沒用的)
如果我們希望:被檢索到的內容越多越好,是追求“查全率”,即A/(A+C),越大越好。
如果我們希望:檢索到的文檔中,真正想要的、也就是相關的越多越好,不相關的越少越好,是追求“準確率”,即A/(A+B),越大越好。
#Precision-recall curve precision, recall, thresholds = precision_recall_curve(y_true, y_scores) precision = np.fliplr([precision])[0] #so the array is increasing (you won't get negative AUC) recall = np.fliplr([recall])[0] #so the array is increasing (you won't get negative AUC) AUC_prec_rec = np.trapz(precision,recall) print "\nArea under Precision-Recall curve: " +str(AUC_prec_rec) prec_rec_curve = plt.figure() plt.plot(recall,precision,'-',label='Area Under the Curve (AUC = %0.4f)' % AUC_prec_rec) plt.title('Precision - Recall curve') plt.xlabel("Recall") plt.ylabel("Precision") plt.legend(loc="lower right") plt.savefig(path_experiment+"Precision_recall.png")- sklearn.metrics.confusion_matrix : 混淆矩陣
混淆矩陣是對有監督學習分類算法準確率進行評估的工具。通過將模型預測的數據與測試數據進行對比,使用各種指標對模型的分類效果進行度量。
#Confusion matrix threshold_confusion = 0.5 print ("\nConfusion matrix: Costum threshold (for positive) of " +str(threshold_confusion)) y_pred = np.empty((y_scores.shape[0])) for i in range(y_scores.shape[0]):if y_scores[i]>=threshold_confusion:y_pred[i]=1else:y_pred[i]=0 confusion = confusion_matrix(y_true, y_pred) print (confusion) accuracy = 0 if float(np.sum(confusion))!=0:accuracy = float(confusion[0,0]+confusion[1,1])/float(np.sum(confusion)) print ("Global Accuracy: " +str(accuracy)) specificity = 0 if float(confusion[0,0]+confusion[0,1])!=0:specificity = float(confusion[0,0])/float(confusion[0,0]+confusion[0,1]) print ("Specificity: " +str(specificity)) sensitivity = 0 if float(confusion[1,1]+confusion[1,0])!=0:sensitivity = float(confusion[1,1])/float(confusion[1,1]+confusion[1,0]) print ("Sensitivity: " +str(sensitivity)) precision = 0 if float(confusion[1,1]+confusion[0,1])!=0:precision = float(confusion[1,1])/float(confusion[1,1]+confusion[0,1]) print ("Precision: " +str(precision))- sklearn.metrics. jaccard_similarity_score : jacaard相似度
jaccard index又稱為jaccard similarity coefficient用于比較有限樣本集之間的相似性和差異性。定義:
給定兩個集合A,B jaccard 系數定義為A與B交集的大小與并集大小的比值。jaccard值越大說明相似度越高。
#Jaccard similarity index jaccard_index = jaccard_similarity_score(y_true, y_pred, normalize=True) print ("\nJaccard similarity score: " +str(jaccard_index))- sklearn.metrics.f1_score
F1-score: ?是準確率與召回率的綜合。 可以認為是平均效果
#F1 score F1_score = f1_score(y_true, y_pred, labels=None, average='binary', sample_weight=None) print ("\nF1 score (F-measure): " +str(F1_score))?
最后保存數據結果。
#Save the results file_perf = open(path_experiment+'performances.txt', 'w') file_perf.write("Area under the ROC curve: "+str(AUC_ROC)+ "\nArea under Precision-Recall curve: " +str(AUC_prec_rec)+ "\nJaccard similarity score: " +str(jaccard_index)+ "\nF1 score (F-measure): " +str(F1_score)+"\n\nConfusion matrix:"+str(confusion)+"\nACCURACY: " +str(accuracy)+"\nSENSITIVITY: " +str(sensitivity)+"\nSPECIFICITY: " +str(specificity)+"\nPRECISION: " +str(precision)) file_perf.close()總結
以上是生活随笔為你收集整理的Unet项目解析(4): ./src/RetinaNN_predict.py的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: SOCKET入门最简单的程序啊
- 下一篇: 使用临界段实现优化的进程间同步对象-原理