利用机器学习方法进行岩相分类
? ? ? 本文引用Brendon Hall在2016年《The Leading Edge》上發表的題為“Facies classfication using machine learning”文章,Hall(2016)在文中介紹了SVM方法在巖相分類中的應用。本文實現了利用神經網絡方法實現巖相分類。
模型:(引用吳恩達老師深度學習微專業編程作業2.3.2-利用TensorFlow搭建神經網絡模型)
?LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX
劃分數據集:
training_data = pd.read_csv('training_data.csv')
test_data? ? ? = training_data[training_data['Well Name'] == 'SHANKLE']??#從訓練數據集中抽出一口井用于測試
training_data = training_data[training_data['Well Name'] != 'SHANKLE']? #從訓練數據剔除這口井
all_vectors = training_data.drop(['Facies','Formation', 'Well Name', 'Depth'], axis=1)
all_labels? = training_data['Facies'].values
nan_idx = np.any(np.isnan(all_vectors), axis=1)??# 剔除NaNs
training_vectors = all_vectors[np.logical_not(nan_idx)]
training_labels? = all_labels [np.logical_not(nan_idx)]
test_vectors = test_data.drop(['Facies','Formation', 'Well Name', 'Depth'], axis=1)
test_labels? = np.ones(test_vectors.shape[0], dtype=np.int)
test_labels_true? = test_data['Facies'].values
scaler = preprocessing.StandardScaler().fit(training_vectors)
scaled_training_vectors = scaler.transform(training_vectors)
scaled_test_vectors = scaler.transform(test_vectors)? #據我了解,測試集應該與訓練集采用相同的均值和方差用于標準化
X_train, X_cv, Y_train, Y_cv = train_test_split(scaled_training_vectors, training_labels, test_size=0.05, random_state=42)
轉換數據集矩陣:
X_train = X_train.T? #處理完后每一列代表一樣樣本
Y_train = Y_train.T
X_cv = X_cv.T
Y_cv = Y_cv.T將巖相類別轉換為tensorflow需要的one-hot向量:
Y_train = convert_to_one_hot(Y_train-1, 9)? #其中的“-1”是因為類別標簽是1-9,而程序中one-hot對應為0-8
Y_cv = convert_to_one_hot(Y_cv-1, 9)
創建placeholder:
X=tf.placeholder(tf.float32,shape=[n_x,None],name='X')
Y=tf.placeholder(tf.float32,shape=[n_y,None],name='Y')
初始化模型參數:
W1 = tf.get_variable("W1", [65, 7], initializer = tf.contrib.layers.xavier_initializer(seed=1))
b1 = tf.get_variable("b1", [65, 1], initializer = tf.zeros_initializer())
W2 = tf.get_variable("W2", [25, 65], initializer = tf.contrib.layers.xavier_initializer(seed=1))
b2 = tf.get_variable("b2", [25, 1], initializer = tf.zeros_initializer())
W3 = tf.get_variable("W3", [9, 25], initializer = tf.contrib.layers.xavier_initializer(seed=1))
b3 = tf.get_variable("b3", [9, 1], initializer = tf.zeros_initializer())
前向傳播:
Z1 = tf.add(tf.matmul(W1,X),b1)
A1 = tf.nn.relu(Z1)
Z2 = tf.add(tf.matmul(W2,A1),b2)? ? ? ? ? ? ? ? ? ? ? ?
A2 = tf.nn.relu(Z2)
Z3 = tf.add(tf.matmul(W3,A2),b3)
計算損失函數:
logits = tf.transpose(Z3)
labels = tf.transpose(Y)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels))
反向傳播及參數更新:
反向傳播
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
參數優化
_ , minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y})
建立整體模型:
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001,num_epochs = 4000, minibatch_size = 512,?
? ? ? ? ? ? ? ? ? print_cost = True):
? ? """? ? Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.
? ??
? ? Arguments:
? ? X_train -- training set, of shape (input size = 7, number of training examples = 2783)
? ? Y_train -- test set, of shape (output size = 9, number of training examples = 2783)
? ? X_test -- training set, of shape (input size = 7, number of training examples = 449)
? ? Y_test -- test set, of shape (output size = 9, number of test examples = 449)
? ? learning_rate -- learning rate of the optimization
? ? num_epochs -- number of epochs of the optimization loop
? ? minibatch_size -- size of a minibatch
? ? print_cost -- True to print the cost every 100 epochs
? ??
? ? Returns:
? ? parameters -- parameters learnt by the model. They can then be used to predict.
? ? """
? ??
? ? ops.reset_default_graph()? ? ? ? ? ? ? ? ? ? ? ? ?# to be able to rerun the model without overwriting tf variables
? ? tf.set_random_seed(1)? ? ? ? ? ? ? ? ? ? ? ? ? ? ?# to keep consistent results
? ? seed = 3? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? # to keep consistent results
? ? (n_x, m) = X_train.shape? ? ? ? ? ? ? ? ? ? ? ? ? # (n_x: input size, m : number of examples in the train set)
? ? n_y = Y_train.shape[0]? ? ? ? ? ? ? ? ? ? ? ? ? ? # n_y : output size
? ? costs = []? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? # To keep track of the cost
? ??
? ? # Create Placeholders of shape (n_x, n_y)
? ? ### START CODE HERE ### (1 line)
? ? X, Y = create_placeholders(n_x, n_y)
? ? ### END CODE HERE ###
? ? # Initialize parameters
? ? ### START CODE HERE ### (1 line)
? ? parameters = initialize_parameters()
? ? ### END CODE HERE ###
? ??
? ? # Forward propagation: Build the forward propagation in the tensorflow graph
? ? ### START CODE HERE ### (1 line)
? ? Z3 = forward_propagation(X, parameters)
? ? ### END CODE HERE ###
? ??
? ? # Cost function: Add cost function to tensorflow graph
? ? ### START CODE HERE ### (1 line)
? ? cost = compute_cost(Z3, Y)
? ? ### END CODE HERE ###
? ??
? ? # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
? ? ### START CODE HERE ### (1 line)
? ? optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
? ? ### END CODE HERE ###
? ??
? ? # Initialize all the variables
? ? init = tf.global_variables_initializer()
? ? # Start the session to compute the tensorflow graph
? ? with tf.Session() as sess:
? ? ? ??
? ? ? ? # Run the initialization
? ? ? ? sess.run(init)
? ? ? ??
? ? ? ? # Do the training loop
? ? ? ? for epoch in range(num_epochs):
? ? ? ? ? ? epoch_cost = 0.? ? ? ? ? ? ? ? ? ? ? ?# Defines a cost related to an epoch
? ? ? ? ? ? num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
? ? ? ? ? ? seed = seed + 1
? ? ? ? ? ? minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)
? ? ? ? ? ? for minibatch in minibatches:
? ? ? ? ? ? ? ? # Select a minibatch
? ? ? ? ? ? ? ? (minibatch_X, minibatch_Y) = minibatch
? ? ? ? ? ? ? ??
? ? ? ? ? ? ? ? # IMPORTANT: The line that runs the graph on a minibatch.
? ? ? ? ? ? ? ? # Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y).
? ? ? ? ? ? ? ? ### START CODE HERE ### (1 line)
? ? ? ? ? ? ? ? _ , minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y})
? ? ? ? ? ? ? ? ### END CODE HERE ###
? ? ? ? ? ? ? ??
? ? ? ? ? ? ? ? epoch_cost += minibatch_cost / num_minibatches
? ? ? ? ? ? # Print the cost every epoch
? ? ? ? ? ? if print_cost == True and epoch % 100 == 0:
? ? ? ? ? ? ? ? print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
? ? ? ? ? ? if print_cost == True and epoch % 5 == 0:
? ? ? ? ? ? ? ? costs.append(epoch_cost)
? ? ? ? ? ? ? ??
? ? ? ? # plot the cost
? ? ? ? plt.plot(np.squeeze(costs))
? ? ? ? plt.ylabel('cost')
? ? ? ? plt.xlabel('iterations (per tens)')
? ? ? ? plt.title("Learning rate =" + str(learning_rate))
? ? ? ? plt.show()
? ? ? ? # lets save the parameters in a variable
? ? ? ? parameters = sess.run(parameters)
? ? ? ? print("Parameters have been trained!")
? ? ? ? # Calculate the correct predictions
? ? ? ? correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))
? ? ? ? # Calculate accuracy on the test set
? ? ? ? accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
? ? ? ? print("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
? ? ? ? print("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
? ? ? ??
? ? ? ? return parameters
預測:
X_test = scaled_test_vectors.T
Y_test_orig= test_labels_true.T
test_prediction = predict(X_test, parameters)? ? #一列代表一個樣本,用向量化速度快
def predict(X, parameters):
? ??
? ? W1 = tf.convert_to_tensor(parameters["W1"])
? ? b1 = tf.convert_to_tensor(parameters["b1"])
? ? W2 = tf.convert_to_tensor(parameters["W2"])
? ? b2 = tf.convert_to_tensor(parameters["b2"])
? ? W3 = tf.convert_to_tensor(parameters["W3"])
? ? b3 = tf.convert_to_tensor(parameters["b3"])
? ??
? ? params = {"W1": W1,
? ? ? ? ? ?"b1": b1,
? ? ? ? ? ?"W2": W2,
? ? ? ? ? ?"b2": b2,
? ? ? ? ? ?"W3": W3,
? ? ? ? ? ?"b3": b3}
? ??
? ? x = tf.placeholder("float", [X.shape[0], X.shape[1]])
? ??
? ? z3 = forward_propagation_for_predict(x, params)
? ? p = tf.argmax(z3)
? ??
? ? sess = tf.Session()
? ? prediction = sess.run(p, feed_dict = {x: X})
? ? ? ??
? ? return prediction
本人的測試結果如下:
參考Brendon Hall論文、Andrew Ng深度學習課程以及OliverChrist博客,向他們表示感謝!
總結
以上是生活随笔為你收集整理的利用机器学习方法进行岩相分类的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Solidworks_ Flexnet_
- 下一篇: 在线求帮帮忙