Detecting articles from a newspaper using opencv?
I tried this using dilation and the code I used is below
#include "stdafx.h"
#include <fstream>
#include <iostream>
#include "opencv2/opencv.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
using namespace cv;
using namespace std;
int main(int argc, char* argv[]) {
Mat matImage = imread("Images/newspaper2.jpeg");
if (!matImage.data) {
cout << "Unable to open the file\n" << endl;
return 1;
}
int iterations=5;
Point anchor;
Mat grayImage;
Mat threshImage;
Mat dilatedImage;
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
cvtColor(matImage, grayImage, COLOR_RGB2GRAY);
threshold(grayImage, threshImage, 150, 255, THRESH_BINARY_INV);
Mat element = getStructuringElement(MORPH_CROSS,Size(3,3));
dilate(threshImage, dilatedImage, element, anchor = Point(-1, -1), iterations);
findContours(dilatedImage, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_NONE, Point(0, 0));
vector<Rect> boundRect(contours.size());
for(int contour = 0; contour < contours.size(); contour++){
boundRect[contour] = boundingRect(contours[contour]);
rectangle(dilatedImage, boundRect[contour].tl(), boundRect[contour].br(), (255,0,255), 2);
}
imshow("Contours",dilatedImage);
imwrite("Output.png", dilatedImage);
waitKey(0);
return 0;
}
and the output I got were not satsfying. This is my input image But my output is this.
But I didn't want this. I then saw this another approach in this research paper "Logical segmentation for article extraction in nespapers " in which they used CRF model and labeled the headings,paragraphs and text lines. You can see their approach from the following image
But my problem is I don't know where to start in this one and also how to use crf ,can anyone please help me out in finding a way or can suggest some other approach