Revision history [back]

Opencv Better Way to find contours for lighter text

I am trying to extract text from ID cards, I want to identify all the text areas, crop it and pass it into tesseract. Below is the code I now have.

class Transformer(object):
    def __init__(self):
        super().__init__()

    def __call__(self, gray, rgba):
        ret, image_binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

        kernel = np.ones((3, 3), np.uint8)
        image_closed = cv2.morphologyEx(image_binary, cv2.MORPH_CLOSE, kernel)
        image_open = cv2.morphologyEx(image_closed, cv2.MORPH_OPEN, kernel)

        kernel = np.ones((2, 2), np.uint8)
        image_dilate = cv2.erode(image_open, kernel, 5)

        image_not = cv2.bitwise_not(image_dilate)
        image_not = cv2.adaptiveThreshold(image_not, 255, cv2.ADAPTIVE_THRESH_MEAN_C,  cv2.THRESH_BINARY, 15, -2)

        image_dilate = cv2.dilate(image_not, np.ones((2, 1)))
        image_dilate = cv2.dilate(image_dilate, np.ones((1, 15)))


        image, contours, heirarchy = cv2.findContours(image_dilate, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            # if w > 30 and h > 10:
            cv2.rectangle(rgba, (x, y), (x + w, y + h), (0, 255, 0), 1)

        return image_dilate, rgbamage_dilate, rgba

This transforms image to look like this

image description

This works ok, but I will also like to capture the arabic text and cropped out regions, which is lighter. How do I achieve this.