I want to build an ANN using opencv ANN MLP Classifier to predict MNIST. I use 350 epoch, but my network keep predict '9' even though I test it using the training image. Below is my code :
import cv2 import numpy as np import os from os import listdir import time
start = time.time()
create ANN
net = cv2.ml.ANN_MLP_create() net.setTrainMethod(cv2.ml.ANN_MLP_BACKPROP, 0.0001) net.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM) net.setLayerSizes(np.array([784, 100, 10])) net.setTermCriteria(( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 100, 0.00001 ))
output
class_0 = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0] class_1 = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0] class_2 = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0] class_3 = [0, 0, 0, 1, 0, 0, 0, 0, 0, 0] class_4 = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0] class_5 = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0] class_6 = [0, 0, 0, 0, 0, 0, 1, 0, 0, 0] class_7 = [0, 0, 0, 0, 0, 0, 0, 1, 0, 0] class_8 = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0] class_9 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1] array_class = [class_0, class_1, class_2, class_3, class_4, class_5, class_6, class_7, class_8, class_9]
def image(sample, classification): return (np.array([sample], dtype=np.float32), np.array([classification], dtype=np.float32))
input
print("Processing the images..") images = [] os.chdir("dataset/mnist_png/training") for digit in range(0, 10): os.chdir(str(digit)) path = os.getcwd() for file in listdir(path): img = cv2.imread(file) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blurred = cv2.GaussianBlur(img, (7,7), 0)
ret, img_th = cv2.threshold(img, 0, 1, cv2.THRESH_BINARY_INV)
img_eroded = cv2.erode(img, np.ones((2,2), np.uint8), iterations = 2)
if(img_th.shape[0] != 28 or img_th.shape[1] != 28):
img_th = cv2.resize(img_th, (28, 28), interpolation = cv2.INTER_CUBIC)
images.append(image(img_th.ravel(), array_class[digit]))
os.chdir("../")
os.chdir("../")
train
print("Training..") epochs = 350 for e in range(0, epochs): tmp = 0 for data, cls in images: tmp += 1 print("Training image " + str(tmp) + " / 60000 (Epoch " + str(e+1) + "/" + str(epochs) + ")") net.train(data, cv2.ml.ROW_SAMPLE, cls)
test
print("Testing..") result_class = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
for count testing files
os.chdir("new_testing") path = os.getcwd() tmp = 0 for file in listdir(path): tmp += 1 testing_image = cv2.imread(file) testing_image = cv2.cvtColor(testing_image, cv2.COLOR_BGR2GRAY) ret, testing_image_th = cv2.threshold(testing_image, 0, 1, cv2.THRESH_BINARY) if(testing_image_th.shape[0] != 28 or testing_image_th.shape[1] != 28): testing_image_th = cv2.resize(testing_image_th, (28, 28), interpolation = cv2.INTER_CUBIC) print("Testing image " + str(tmp) + " / 10000") predict_class = int(net.predict(np.array([testing_image_th.flatten()], dtype=np.float32))[0]) print ("class predicted: " + str(predict_class)) if(str(predict_class) == os.path.splitext(file)[0].split('_')[0]): result_class[predict_class] += 1
a = 0 for x in range(0, 10): a += result_class[x]
print ("Accuracy: " + str(a/tmp))
end = time.time() print("Time to process: " + str(end - start))
Anything wrong with that?