I'm using the cascade classifier with the front-face training data to detect faces in a still image (i.e. static jpg). I don't want to have to search the whole image for faces at once, so I send the cascade classifier an ROI from the full image. What I've found is that the classifier's ability to find the face in the image is dependent on the dimensions/position of the ROI I give it. Note, I only give it ROIs that include the face fully, and with lots of margin.
I'm using OpenCV 3.0.0.
I have modified the OpenCV sample program to demonstrate the behavior. I started to test the new example code (obtained from: http://docs.opencv.org/master/db/d28/tutorial_cascade_classifier.html#gsc.tab=0) but that code can't find the face in my sample image at all (see attachment).
Sample image: C:\fakepath\faceDetectTestImage.png
Code embedded below:
I believe this might indicate a bug in the cascade classifier because I do not think the detection result should change based on the ROI cropping. If I'm wrong about that, I would love to understand why it does matter. Thanks!
#include "opencv2/objdetect.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/videoio.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/videoio/videoio_c.h"
#include "opencv2/highgui/highgui_c.h"
#include <cctype>
#include <iostream>
#include <iterator>
#include <stdio.h>
using namespace std;
using namespace cv;
/* Function Headers */
void detectAndDisplay( Mat frame );
/* Global variables */
String face_cascade_name = "haarcascade_frontalface_alt.xml";
String eyes_cascade_name = "haarcascade_eye_tree_eyeglasses.xml";
CascadeClassifier face_cascade;
CascadeClassifier eyes_cascade;
String window_name = "Capture - Face detection";
/* @function main */
/* Mod begin */
// parameters used in adjusting the face detection search sub-region
int gFaceSearchRoiX = 0;
int gFaceSearchRoiY = 0;
int gFaceSearchRoiWidthAdj = 0;
int gFaceSearchRoiHeightAdj = 0;
const int kMaxFaceSearchRoiHeightAdjVal = 200;
/* Mod end */
static void help()
cout << "\nThis program demonstrates the cascade recognizer. Now you can use Haar or LBP features.\n"
"This classifier can recognize many kinds of rigid objects, once the appropriate classifier is trained.\n"
"It's most known use is for faces.\n"
"./facedetect [--cascade=<cascade_path> this is the primary trained classifier such as frontal face]\n"
" [--nested-cascade[=nested_cascade_path this an optional secondary classifier such as eyes]]\n"
" [--scale=<image scale greater or equal to 1, try 1.3 for example>]\n"
" [--try-flip]\n"
" [filename|camera_index]\n\n"
"see facedetect.cmd for one call:\n"
"./facedetect --cascade=\"../../data/haarcascades/haarcascade_frontalface_alt.xml\" --nested-cascade=\"../../data/haarcascades/haarcascade_eye.xml\" --scale=1.3\n\n"
"During execution:\n\tHit any key to quit.\n"
"\tUsing OpenCV version " << CV_VERSION << "\n" << endl;
void detectAndDraw( Mat& img, CascadeClassifier& cascade,
CascadeClassifier& nestedCascade,
double scale, bool tryflip );
string cascadeName = "../../data/haarcascades/haarcascade_frontalface_alt.xml";
string nestedCascadeName = "../../data/haarcascades/haarcascade_eye_tree_eyeglasses.xml";
int main( int argc, const char** argv )
CvCapture* capture = 0;
Mat frame, frameCopy, image;
const string scaleOpt = "--scale=";
size_t scaleOptLen = scaleOpt.length();
const string cascadeOpt = "--cascade=";
size_t cascadeOptLen = cascadeOpt.length();
const string nestedCascadeOpt = "--nested-cascade";
size_t nestedCascadeOptLen = nestedCascadeOpt.length();
const string tryFlipOpt = "--try-flip";
size_t tryFlipOptLen = tryFlipOpt.length();
string inputName;
/* Mod end */
VideoCapture capture;
Mat frame;
//-- 1. Load the cascades
if( !face_cascade.load( face_cascade_name ) ){ printf("--(!)Error loading face cascade\n"); return -1; };
if( !eyes_cascade.load( eyes_cascade_name ) ){ printf("--(!)Error loading eyes cascade\n"); return -1; };
//-- 2. Read in the image data
inputName.assign( argv[1] );
// get image from provided file
/* Mod begin */
frame = false;
CascadeClassifier cascade, nestedCascade;
double scale = 1;
frame = cv::imread(inputName);
detectAndDisplay( frame );
/* Mod end */
detectAndDisplay( frame );
/*** Mod end */
// get image stream from first camera device
capture.open( -1 );
if ( ! capture.isOpened() ) { printf("--(!)Error opening video capture\n"); return -1; }
while ( capture.read(frame) )
cout << "Processing " << i << " " << argv[i] << endl;
if ( frame.empty() )
cascadeName.assign( argv[i] + cascadeOptLen );
cout << " from which we have cascadeName= " << cascadeName << endl;
printf(" --(!) No captured frame -- Break!");
else if( nestedCascadeOpt.compare( 0, nestedCascadeOptLen, argv[i], nestedCascadeOptLen ) == 0 )
if( argv[i][nestedCascadeOpt.length()] == '=' )
nestedCascadeName.assign( argv[i] + nestedCascadeOpt.length() + 1 );
if( !nestedCascade.load( nestedCascadeName ) )
cerr << "WARNING: Could not load
//-- 3. Apply the classifier cascade for nested objects" << endl;
else if( scaleOpt.compare( 0, scaleOptLen, argv[i], scaleOptLen ) == 0 )
if( !sscanf( argv[i] + scaleOpt.length(), "%lf", &scale ) || scale < 1 )
scale = 1;
cout << " from which we read scale = " << scale << endl;
else if( tryFlipOpt.compare( 0, tryFlipOptLen, argv[i], tryFlipOptLen ) == 0 )
tryflip = true;
cout << " will try to flip image horizontally to detect assymetric objects\n";
else if( argv[i][0] == '-' )
cerr << "WARNING: Unknown option %s" << argv[i] << endl;
inputName.assign( argv[i] );
if( !cascade.load( cascadeName ) )
cerr << "ERROR: Could not load classifier cascade" << endl;
return -1;
if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') )
capture = cvCaptureFromCAM( inputName.empty() ? 0 : inputName.c_str()[0] - '0' );
the frame
detectAndDisplay( frame );
int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0' ;
if(!capture) cout << "Capture from CAM " << c << " didn't work" << endl;
else waitKey(10);
if( inputName.size() )
image = imread( inputName, 1 );
if( image.empty() )
capture = cvCaptureFromAVI( inputName.c_str() );
if(!capture) cout << "Capture from AVI didn't work" << endl;
image = imread( "../data/lena.jpg", 1 );
if(image.empty()) cout << "Couldn't read ../data/lena.jpg" << endl;
cvNamedWindow( "result", 1 );
if( capture )
cout << "In capture ..." << endl;
IplImage* iplImg = cvQueryFrame( capture );
frame = cv::cvarrToMat(iplImg);
if( frame.empty() )
if( iplImg->origin == IPL_ORIGIN_TL )
frame.copyTo( frameCopy );
flip( frame, frameCopy, 0 );
detectAndDraw( frameCopy, cascade, nestedCascade, scale, tryflip );
if( waitKey( 10 ) >= 0 )
goto _cleanup_;
cvReleaseCapture( &capture );
cout << "In image read" << endl;
if( !image.empty() )
detectAndDraw( image, cascade, nestedCascade, scale, tryflip );
else if( !inputName.empty() )
/* assume it is a text file containing the
list of the image filenames to be processed - one per line */
FILE* f = fopen( inputName.c_str(), "rt" );
if( f )
char buf[1000+1];
while( fgets( buf, 1000, f ) )
int len = (int)strlen(buf), c;
while( len > 0 && isspace(buf[len-1]) )
buf[len] = '\0';
cout << "file " << buf << endl;
image = imread( buf, 1 );
if( !image.empty() )
detectAndDraw( image, cascade, nestedCascade, scale, tryflip );
c = waitKey(0);
if( c (char)c == 27 || c == 'q' || c == 'Q' )
) { break; } // escape
cerr << "Aw snap, couldn't read image " << buf << endl;
return 0;
/* @function detectAndDisplay */
void detectAndDraw( Mat& img, CascadeClassifier& cascade,
CascadeClassifier& nestedCascade,
void detectAndDisplay( Mat frame )
{
int {
std::vector<Rect> faces;
Mat frame_gray;
cvtColor( frame, frame_gray, COLOR_BGR2GRAY );
equalizeHist( frame_gray, frame_gray );
//-- Detect faces
face_cascade.detectMultiScale( frame_gray, faces, 1.1, 2, 0|CASCADE_SCALE_IMAGE, Size(30, 30) );
for( size_t i = 0;
double t = 0;
vector<Rect> faces, faces2;
const static Scalar colors[] = { CV_RGB(0,0,255),
CV_RGB(255,0,255)} ;
Mat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
cvtColor( img, gray, COLOR_BGR2GRAY );
resize( gray, smallImg, smallImg.size(), 0; i < faces.size(); i++ )
Point center( faces[i].x + faces[i].width/2, faces[i].y + faces[i].height/2 );
ellipse( frame, center, Size( faces[i].width/2, faces[i].height/2), 0, 0, INTER_LINEAR );
equalizeHist( smallImg, smallImg );
/* Mod begin */
int faceSearchRoiX 360, Scalar( 255, 0, 255 ), 4, 8, 0 );
Mat faceROI = gFaceSearchRoiX;
int faceSearchRoiY = gFaceSearchRoiY;
int faceSearchRoiWidth = smallImg.size().width - gFaceSearchRoiWidthAdj;
int faceSearchRoiHeight = smallImg.size().height - gFaceSearchRoiHeightAdj;
Rect faceSearchRoi = Rect(faceSearchRoiX, faceSearchRoiY,
faceSearchRoiWidth, faceSearchRoiHeight);
Mat subSmallImg = Mat(Size(faceSearchRoi.width, faceSearchRoi.height),
subSmallImg = Mat(smallImg, faceSearchRoi);
/* Mod end */
t = (double)cvGetTickCount();
cascade.detectMultiScale( subSmallImg, faces,
frame_gray( faces[i] );
std::vector<Rect> eyes;
//-- In each face, detect eyes
eyes_cascade.detectMultiScale( faceROI, eyes, 1.1, 2, 0
0 |CASCADE_SCALE_IMAGE, Size(30, 30) );
for( size_t j = 0; j < eyes.size(); j++ )
flip(smallImg, smallImg, 1);
cascade.detectMultiScale( smallImg, faces2,
1.1, 2, 0
Size(30, 30) );
for( vector<Rect>::const_iterator r = faces2.begin(); r != faces2.end(); r++ )
faces.push_back(Rect(smallImg.cols - r->x - r->width, r->y, r->width, r->height));
t = (double)cvGetTickCount() - t;
// printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
/* Mod begin */
// output to console
if(faces.size() == 0)
cout << "No faces found with search ROI height set to " << img.size().height - gFaceSearchRoiHeightAdj
<< " pixels.\n";
// draw ROI
cv::Mat displayImg;
Rect scaledFaceSearchRoi = faceSearchRoi;
scaledFaceSearchRoi.x *= scale;
scaledFaceSearchRoi.y *= scale;
scaledFaceSearchRoi.width *= scale;
scaledFaceSearchRoi.height *= scale;
rectangle(displayImg, scaledFaceSearchRoi, Scalar(0,0,255), 2);
/* Mod end */
for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
Mat smallImgROI;
vector<Rect> nestedObjects;
Point center;
Scalar color = colors[i%8];
eye_center( faces[i].x + eyes[j].x + eyes[j].width/2, faces[i].y + eyes[j].y + eyes[j].height/2 );
int radius;
double aspect_ratio = (double)r->width/r->height;
if( 0.75 < aspect_ratio && aspect_ratio < 1.3 )
center.x = (scale * faceSearchRoiX) + cvRound((r->x + r->width*0.5)*scale);
center.y = (scale * faceSearchRoiY) + cvRound((r->y + r->height*0.5)*scale);
radius = cvRound((r->width cvRound( (eyes[j].width + r->height)*0.25*scale);
eyes[j].height)*0.25 );
circle( displayImg, center, frame, eye_center, radius, color, 3, Scalar( 255, 0, 0 ), 4, 8, 0 );
rectangle( img, cvPoint(cvRound(r->x*scale), cvRound(r->y*scale)),
cvPoint(cvRound((r->x + r->width-1)*scale), cvRound((r->y + r->height-1)*scale)),
color, 3, 8, 0);
if( nestedCascade.empty() )
smallImgROI = smallImg(*r);
nestedCascade.detectMultiScale( smallImgROI, nestedObjects,
1.1, 2, 0
Size(30, 30) );
for( vector<Rect>::const_iterator nr = nestedObjects.begin(); nr != nestedObjects.end(); nr++ )
center.x = cvRound((r->x + nr->x + nr->width*0.5)*scale);
center.y = cvRound((r->y + nr->y + nr->height*0.5)*scale);
radius = cvRound((nr->width + nr->height)*0.25*scale);
circle( img, center, radius, color, 3, 8, 0 );
/* Mod begin */
imshow( "result", frame );
if(gFaceSearchRoiHeightAdj < kMaxFaceSearchRoiHeightAdjVal)
detectAndDraw( img, cascade, nestedCascade, scale, tryflip );
/* Mod end */
//-- Show what you got
imshow( window_name, frame );