基于小波分解和卷积神经网络的图像识别研究

本文属于图像识别领域的hello world,属于很简单的那种,目的是探索小波变换对CNN图像识别的影响。用的数据集也是很简单的 MNIST 数据集,由 70000 张 0 到 9 之间的手写数字灰度图像组成,为 28 x 28 像素,训练集60000 张图像,测试集10000 张图像。

首先导入相关模块

import pywtimport randomimport sklearnimport numpy as npimport pandas as pdimport seaborn as snsimport kerasimport matplotlib.pyplot as pltfrom sklearn.metrics import confusion_matrixfrom sklearn import metricsimport tensorflow as tfimport tensorflow.kerasfrom keras.models import Sequentialfrom keras.layers import Dense, Conv2D, Flatten, Dropout, BatchNormalization, MaxPooling2Dimport timeitfrom tensorflow.keras.utils import to_categoricalfrom keras.callbacks import EarlyStopping, ModelCheckpoint

首先进行数据加载

random.seed(666)mnist = tf.keras.datasets.mnist(x_train, y_train), (x_test, y_test) = mnist.load_data()

由于图像已经是灰度图并且都具有相同的尺寸,因此直接将像素值归一化

X_train = x_train / 255.0X_test = x_test / 255.0

可视化部分数据

fig = plt.figure(figsize = (20, 8))for i, a in enumerate(X_train[:15]):    ax = fig.add_subplot(3, 5, i + 1)    ax.imshow(a, cmap = "Greys")    ax.set_title('Value = ' + str(y_train[i]), fontsize = 10)    ax.set_xticks([])    ax.set_yticks([])fig.tight_layout()plt.show()

卷积神经网络对原始图像进行分类,首先进行输入输出设置

X_train_conv = X_train.reshape(len(X_train), X_train[0].shape[0], X_train[0].shape[1], 1)X_test_conv = X_test.reshape(len(X_test), X_test[0].shape[0], X_test[0].shape[1], 1)y_train_conv = to_categorical(y_train)y_test_conv = to_categorical(y_test)

然后构建一个简单的卷积神经网络

model = Sequential()model.add(Conv2D(128, kernel_size = 3, activation = 'relu', input_shape = (X_train_conv[0].shape), padding = 'valid'))model.add(Conv2D(128, kernel_size = 3, activation = 'relu'))model.add(BatchNormalization())model.add(Dropout(0.2))model.add(MaxPooling2D((2, 2)))model.add(Conv2D(64, kernel_size = 3, activation = 'relu'))model.add(Conv2D(32, kernel_size = 3, activation = 'relu'))model.add(Conv2D(64, kernel_size = 3, activation = 'relu'))model.add(Flatten())model.add(Dense(10, activation = 'relu'))model.add(Dense(100, activation = 'relu'))model.add(Dense(10, activation = 'softmax'))model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])model.summary()

如果模型没有改善就停止训练,并保存最好的模型

filepath = 'best_model.hdf5'earlyStopping = EarlyStopping(monitor = 'val_loss', patience = 10, verbose = 0, mode = 'min')checkpoint = ModelCheckpoint(filepath = filepath, monitor = 'val_loss', verbose = 1,  save_best_only = True, mode = 'min')start = timeit.default_timer()model_history = model.fit(X_train_conv, y_train_conv, epochs = 50, batch_size = 128, validation_split = 0.3, callbacks = [earlyStopping, checkpoint])stop = timeit.default_timer()print('Time to train the model: ', stop - start) 

看一下训练曲线

plt.plot(model_history.history['accuracy'])plt.plot(model_history.history['val_accuracy'])plt.title('Model accuracy')plt.ylabel('Accuracy')plt.xlabel('Epoch')plt.legend(['Train', 'Validation'], loc = 'upper left')plt.show()plt.plot(model_history.history['loss'])plt.plot(model_history.history['val_loss'])plt.title('Model loss')plt.ylabel('Loss')plt.xlabel('Epoch')plt.legend(['Train', 'Validation'], loc = 'upper left')plt.show()

加载最佳模型并查看准确率

model = keras.models.load_model(filepath)acc = model.evaluate(X_test_conv, y_test_conv)print(acc[1]*100)

绘制混淆矩阵

preds = model.predict(X_test_conv)preds = np.argmax(preds, axis = 1)f,ax = plt.subplots(figsize = (8, 8))sns.heatmap(confusion_matrix(y_test, preds), annot = True, linewidths = 0.01, cmap = "Greens", linecolor = "gray", fmt = '.0f', ax = ax)plt.xlabel("Predicted Label")plt.ylabel("True Label")plt.title("Confusion matrix for original images")plt.show()

可视化一些模型判别错误的例子

errors = y_test != predsimg_errors= X_test[errors,:]correct_labels = y_test[errors]incorrect_labels = preds[errors]fig = plt.figure(figsize = (20, 8))for i, a in enumerate(img_errors[:15]):    ax = fig.add_subplot(3, 5, i + 1)    ax.imshow(a, cmap = "Greys")    ax.set_title('Correct = ' + str(correct_labels[i]) + '  Predict = ' + str(incorrect_labels[i]), fontsize = 10)    ax.set_xticks([])    ax.set_yticks([])fig.tight_layout()plt.show()

下面使用简单的Haar小波对图像进行分解,以第1张图像做示例,进行一层Haar小波分解后可以获得 4 个图像(原始图像的一半大小:28 x 28 -> 14 x 14),分别为近似图像、水平、垂直和对角线细节图像。

plt.imshow(X_train[0], cmap = "Greys")plt.show()titles = ['Original', 'Approximation', ' Horizontal detail', 'Vertical detail', 'Diagonal detail']coeffs2 = pywt.dwt2(X_train[0], 'haar', 'periodization')LL0, (LH0, HL0, HH0) = coeffs2fig = plt.figure(figsize = (12, 3))for i, a in enumerate([X_train[0], LL0, LH0, HL0, HH0]):    ax = fig.add_subplot(1, 5, i + 1)    ax.imshow(a, interpolation = "nearest", cmap = plt.cm.gray)    ax.set_title(titles[i], fontsize = 10)    ax.set_xticks([])    ax.set_yticks([])fig.tight_layout()plt.show()

然后对所有图片进行变换

LL_train = []LL_test = []for img in X_train:    LLi, (LHi, HLi, HHi) = pywt.dwt2(img, 'haar', 'periodization')    LL_train.append(LLi)for img in X_test:    LLi, (LHi, HLi, HHi) = pywt.dwt2(img, 'haar', 'periodization')    LL_test.append(LLi)

使用第一次近似图像训练卷积神经网络

LL_train_conv = np.array(LL_train).reshape(len(LL_train), LL_train[0].shape[0], LL_train[0].shape[1], 1)LL_test_conv = np.array(LL_test).reshape(len(LL_test), LL_test[0].shape[0], LL_test[0].shape[1], 1)model_1 = Sequential()model_1.add(Conv2D(128, kernel_size = 3, activation = 'relu', input_shape = (LL_train_conv[0].shape), padding = 'valid'))model_1.add(Conv2D(128, kernel_size = 3, activation = 'relu'))model_1.add(BatchNormalization())model_1.add(Dropout(0.2))model_1.add(MaxPooling2D((2, 2)))model_1.add(Conv2D(64, kernel_size = 3, activation = 'relu'))model_1.add(Conv2D(32, kernel_size = 3, activation = 'relu'))model_1.add(Flatten())model_1.add(Dense(10, activation = 'relu'))model_1.add(Dense(100, activation = 'relu'))model_1.add(Dense(10, activation = 'softmax'))model_1.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])model_1.summary()

训练模型

filepath = 'best_model_wv.hdf5'earlyStopping = EarlyStopping(monitor = 'val_loss', patience = 10, verbose = 0, mode = 'min')checkpoint = ModelCheckpoint(filepath = filepath, monitor = 'val_loss', verbose = 1,  save_best_only = True, mode = 'min')start = timeit.default_timer()history_model1 = model_1.fit(LL_train_conv, y_train_conv, epochs = 50, batch_size = 128, validation_split = 0.3, callbacks = [earlyStopping, checkpoint])stop = timeit.default_timer()print('Time to train the model: ', stop - start) 

绘制训练曲线

plt.plot(history_model1.history['accuracy'])plt.plot(history_model1.history['val_accuracy'])plt.title('Model accuracy')plt.ylabel('Accuracy')plt.xlabel('Epoch')plt.legend(['Train', 'Validation'], loc = 'upper left')plt.show()plt.plot(history_model1.history['loss'])plt.plot(history_model1.history['val_loss'])plt.title('Model loss')plt.ylabel('Loss')plt.xlabel('Epoch')plt.legend(['Train', 'Validation'], loc = 'upper left')plt.show()

model_1 = keras.models.load_model(filepath)acc_LL = model_1.evaluate(LL_test_conv, y_test_conv)print(acc_LL[1] * 100)

绘制混淆矩阵

preds_LL = model_1.predict(LL_test_conv)preds_LL = np.argmax(preds_LL, axis = 1)f,ax = plt.subplots(figsize = (8, 8))sns.heatmap(confusion_matrix(y_test, preds_LL), annot = True, linewidths = 0.01, cmap = "Greens", linecolor = "gray", fmt = '.0f', ax = ax)plt.xlabel("Predicted Label")plt.ylabel("True Label")plt.title("Confusion matrix for Haar wavelet aproximations")plt.show()

可视化一些模型判别错误的例子

errors_2 = y_test != preds_LLimg_errors_2 = X_test[errors_2,:]correct_labels_2 = y_test[errors_2]incorrect_labels_2 = preds_LL[errors_2]fig = plt.figure(figsize = (20, 8))for i, a in enumerate(img_errors_2[:15]):    ax = fig.add_subplot(3, 5, i + 1)    ax.imshow(a, cmap = "Greys")    ax.set_title('Correct = ' + str(correct_labels_2[i]) + '  Predict = ' + str(incorrect_labels_2[i]), fontsize = 10)    ax.set_xticks([])    ax.set_yticks([])fig.tight_layout()plt.show()

下面使用Haar小波对图片进行2层分解,近似图像大小为7 x 7。

plt.imshow(LL0, cmap = "Greys")plt.show()titles = ['First approximation','Second approximation', ' Second horizontal detail', 'Second vertical detail', 'Second diagonal detail']coeffs2_2 = pywt.dwt2(LL0, 'haar', 'periodization')LL0_2, (LH0_2, HL0_2, HH0_2) = coeffs2_2fig = plt.figure(figsize = (12, 3))for i, a in enumerate([LL0, LL0_2, LH0_2, HL0_2, HH0_2]):    ax = fig.add_subplot(1, 5, i + 1)    ax.imshow(a, interpolation = "nearest", cmap = plt.cm.gray)    ax.set_title(titles[i], fontsize = 10)    ax.set_xticks([])    ax.set_yticks([])fig.tight_layout()plt.show()

对所有图像进行2层小波分解

LL_train_2 = []LL_test_2 = []for img in LL_train:    LLi, (LHi, HLi, HHi) = pywt.dwt2(img, 'haar', 'periodization')    LL_train_2.append(LLi)for img in LL_test:    LLi, (LHi, HLi, HHi) = pywt.dwt2(img, 'haar', 'periodization')    LL_test_2.append(LLi)LL_train_2_conv = np.array(LL_train_2).reshape(len(LL_train_2), LL_train_2[0].shape[0], LL_train_2[0].shape[1], 1)LL_test_2_conv = np.array(LL_test_2).reshape(len(LL_test_2), LL_test_2[0].shape[0], LL_test_2[0].shape[1], 1)model_2 = Sequential()model_2.add(Conv2D(128, kernel_size = 3, activation = 'relu', input_shape = (LL_train_2_conv[0].shape), padding = 'valid'))model_2.add(BatchNormalization())model_2.add(Dropout(0.2))model_2.add(Conv2D(32, kernel_size = 3, activation = 'relu'))model_2.add(Flatten())model_2.add(Dense(10, activation = 'relu'))model_2.add(Dense(100, activation = 'relu'))model_2.add(Dense(10, activation = 'softmax'))model_2.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])model_2.summary()

filepath = 'best_model_wv_2.hdf5'earlyStopping = EarlyStopping(monitor = 'val_loss', patience = 10, verbose = 0, mode = 'min')checkpoint = ModelCheckpoint(filepath = filepath, monitor = 'val_loss', verbose = 1,  save_best_only = True, mode = 'min')start = timeit.default_timer()history_model2 = model_2.fit(LL_train_2_conv, y_train_conv, epochs = 50, batch_size = 128, validation_split = 0.3, callbacks = [earlyStopping, checkpoint])stop = timeit.default_timer()print('Time to train the model: ', stop - start) plt.plot(history_model2.history['accuracy'])plt.plot(history_model2.history['val_accuracy'])plt.title('Model accuracy')plt.ylabel('Accuracy')plt.xlabel('Epoch')plt.legend(['Train', 'Validation'], loc = 'upper left')plt.show()plt.plot(history_model2.history['loss'])plt.plot(history_model2.history['val_loss'])plt.title('Model loss')plt.ylabel('Loss')plt.xlabel('Epoch')plt.legend(['Train', 'Validation'], loc = 'upper left')plt.show()

model_2 = keras.models.load_model(filepath)acc_LL_2 = model_2.evaluate(LL_test_2_conv, y_test_conv)print(acc_LL_2[1] * 100)preds_LL_2 = model_2.predict(LL_test_2_conv)preds_LL_2 = np.argmax(preds_LL_2, axis = 1)f,ax = plt.subplots(figsize = (8, 8))sns.heatmap(confusion_matrix(y_test, preds_LL_2), annot = True, linewidths = 0.01, cmap = "Greens", linecolor = "gray", fmt = '.0f', ax = ax)plt.xlabel("Predicted Label")plt.ylabel("True Label")plt.title("Confusion matrix for Haar wavelet second aproximations")plt.show()

errors_3 = y_test != preds_LL_2img_errors_3 = X_test[errors_3,:]correct_labels_3 = y_test[errors_3]incorrect_labels_3 = preds_LL_2[errors_3]fig = plt.figure(figsize = (20, 8))for i, a in enumerate(img_errors_3[:15]):    ax = fig.add_subplot(3, 5, i + 1)    ax.imshow(a, cmap = "Greys")    ax.set_title('Correct = ' + str(correct_labels_3[i]) + '  Predict = ' + str(incorrect_labels_3[i]), fontsize = 10)    ax.set_xticks([])    ax.set_yticks([])fig.tight_layout()plt.show()

详细的文章请见:

基于小波分解和卷积神经网络的图像识别研究 - 哥廷根数学学派的文章 - 知乎 https://zhuanlan.zhihu.com/p/554876956

发表评论
留言与评论(共有 0 条评论) “”
   
验证码:

相关文章

推荐文章