Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

OpenCL BruteForceMatcher slow and faulty

Hello. I want to match pictures that differ great from one another and for that after lots of tests found the combination ORB+FREAK to be the best. I need at least 4000 keypoints to get reliable results across the images.

The problem is the computation time. I want to cut down on it as much as possible and for that looked into OpenCVs OpenCL implementation (since CUDA only works for NVidia cards, it's not an option for me).

However, not only is the BruteForceMatcher slower than the CPU by a factor of 1.8, it also has less matches. And that is something that shouldn't be possible.

My machine specs: Core 2 Duo E7300 @2,66GHz

Geforce 9500GT (not the fastest, but it shouldn't be this much slower!)

Windows XP 32Bit 4GB Ram (not all usable, but for this small application irrelevant) Visual Studio 2010 with compiled OpenCL module in release mode

The following is some example code. Parts of it are from the example in the OpenCV book. The important stuff happens in the main method. Pass the compiled exe two files and it will try to match them and outputs the result. To change between CPU/GPU, (un)comment the #define.

The images I used for comparison are first and second

The CPU version matches in 0.815s and finds 1530 matches.

The GPU version matches in 1.52s and finds 906 matches.

I am using OpenCV 2.4.3.

Why is the BruteForceMatcher_OCL not working correctly? Have others used it and if yes, what are you results?

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/nonfree/features2d.hpp>
#include <opencv2/legacy/legacy.hpp>
#include "opencv2/ocl/ocl.hpp"
#include <CL/cl.h>

using namespace cv;
using namespace cv::ocl;
using namespace std;

int ratioTest(std::vector<std::vector<cv::DMatch>>
    &matches) {
        int removed=0;
        // for all matches
        for (std::vector<std::vector<cv::DMatch>>::iterator 
            matchIterator= matches.begin();
            matchIterator!= matches.end(); ++matchIterator) {
                // if 2 NN has been identified
                if (matchIterator->size() > 1) {
                    // check distance ratio
                    if ((*matchIterator)[0].distance/
                        (*matchIterator)[1].distance > 0.7) {
                            matchIterator->clear(); // remove match
                            removed++;
                    }
                } else { // does not have 2 neighbours
                    matchIterator->clear(); // remove match
                    removed++;
                }
        }
        return removed;
}

cv::Mat ransacTest(
    const std::vector<cv::DMatch>& matches,
    const std::vector<cv::KeyPoint>& keypoints1, 
    const std::vector<cv::KeyPoint>& keypoints2,
    std::vector<cv::DMatch>& outMatches) 
{
    // Convert keypoints into Point2f   
    std::vector<cv::Point2f> points1, points2;   
    for (std::vector<cv::DMatch>::
        const_iterator it= matches.begin();
        it!= matches.end(); ++it) {
            // Get the position of left keypoints
            float x= keypoints1[it->queryIdx].pt.x;
            float y= keypoints1[it->queryIdx].pt.y;
            points1.push_back(cv::Point2f(x,y));
            // Get the position of right keypoints
            x= keypoints2[it->trainIdx].pt.x;
            y= keypoints2[it->trainIdx].pt.y;
            points2.push_back(cv::Point2f(x,y));
    }
    // Compute F matrix using RANSAC
    std::vector<uchar> inliers(points1.size(),0);
    std::vector<cv::Point2f> out;
    //cv::Mat fundemental= cv::findFundamentalMat(points1, points2, out, CV_FM_RANSAC, 3, 0.99);

    cv::Mat fundemental= findFundamentalMat(
        cv::Mat(points1),cv::Mat(points2), // matching points
        inliers,      // match status (inlier or outlier)  
        CV_FM_RANSAC, // RANSAC method
        3.0,     // distance to epipolar line
        0.99);  // confidence probability   

    // extract the surviving (inliers) matches
    std::vector<uchar>::const_iterator 
        itIn= inliers.begin();
    std::vector<cv::DMatch>::const_iterator 
        itM= matches.begin();
    // for all matches
    for ( ;itIn!= inliers.end(); ++itIn, ++itM) {
        if (*itIn) { // it is a valid match
            outMatches.push_back(*itM);
        }
    }
    return fundemental;
}

void symmetryTest(
    const std::vector<std::vector<cv::DMatch>>& matches1,
    const std::vector<std::vector<cv::DMatch>>& matches2,
    std::vector<cv::DMatch>& symMatches) {         
        // for all matches image 1 -> image 2
        for (std::vector<std::vector<cv::DMatch>>::
            const_iterator matchIterator1= matches1.begin();
            matchIterator1!= matches1.end(); ++matchIterator1) {
                // ignore deleted matches
                if (matchIterator1->size() < 2) 
                    continue;
                // for all matches image 2 -> image 1
                for (std::vector<std::vector<cv::DMatch>>::
                    const_iterator matchIterator2= matches2.begin();
                    matchIterator2!= matches2.end(); 
                ++matchIterator2) {
                    // ignore deleted matches
                    if (matchIterator2->size() < 2) 
                        continue;
                    // Match symmetry test
                    if ((*matchIterator1)[0].queryIdx == 
                        (*matchIterator2)[0].trainIdx  && 
                        (*matchIterator2)[0].queryIdx == 
                        (*matchIterator1)[0].trainIdx) {
                            // add symmetrical match
                            symMatches.push_back(
                                cv::DMatch((*matchIterator1)[0].queryIdx,        
                                (*matchIterator1)[0].trainIdx,
                                (*matchIterator1)[0].distance));
                            break; // next match in image 1 -> image 2
                    }
                }
        }
}

int main( int argc, char** argv ) {

    vector<ocl::Info> info;
    ocl::getDevice(info);

    // Load images
    Mat imgA = imread(argv[1], CV_LOAD_IMAGE_GRAYSCALE );
    if( !imgA.data ) {
        std::cout<< " --(!) Error reading image " << argv[1] << std::endl;
        return -1;
    }

    Mat imgB = imread(argv[2], CV_LOAD_IMAGE_GRAYSCALE );
    if( !imgA.data ) {
        std::cout << " --(!) Error reading image " << argv[2] << std::endl;
        return -1;
    }

    vector<cv::KeyPoint> keypoints1, keypoints2;
    ORB detector = ORB(6000, 1.2F, 4, 21, 0, 2, 0, 21);
    detector.detect(imgA,keypoints1);
    detector.detect(imgB,keypoints2);

    Mat descriptors1, descriptors2;
    FREAK extractor(true, true, 22.0F, 4);

    extractor.compute(imgA,keypoints1,descriptors1);
    extractor.compute(imgB,keypoints2,descriptors2);

    std::vector< cv::DMatch > totalmatches;

    cv::BruteForceMatcher<cv::Hamming> matcher1, matcher2;

    std::vector<cv::DMatch> symMatches;
    std::vector<std::vector<cv::DMatch>> matches1, matches2;
    std::vector<cv::DMatch> matches;

#define GPU

    double t = (double)cv::getTickCount();

#ifdef GPU
    BruteForceMatcher_OCL<ocl::Hamming> oclMatcher;
    oclMat left_ocl;
    oclMat right_ocl;

    left_ocl.upload(descriptors1);
    right_ocl.upload(descriptors2);

    oclMatcher.knnMatch(left_ocl,right_ocl, matches1,2);
    oclMatcher.knnMatch(right_ocl,left_ocl, matches2,2);
#endif

#ifndef GPU
    matcher1.knnMatch(descriptors1,descriptors2, matches1,2);
    matcher2.knnMatch(descriptors2,descriptors1, matches2,2);
#endif

    t = ((double)cv::getTickCount() - t)/cv::getTickFrequency();
    std::cout << "matching time [s]: " << t << std::endl;

    // 3. Remove matches for which NN ratio is 
    // > than threshold
    // clean image 1 -> image 2 matches
    ratioTest(matches1);

    // clean image 2 -> image 1 matches
    ratioTest(matches2);
    // 4. Remove non-symmetrical matches
    symmetryTest(matches1,matches2,symMatches);

    // 5. Validate matches using RANSAC
    cv::Mat fundemental= ransacTest(symMatches, 
        keypoints1, keypoints2, matches);
    Mat imgMatch;
    drawMatches(imgA, keypoints1, imgB, keypoints2, matches, imgMatch, Scalar::all(-1), Scalar::all(-1), vector<char>(), 0);

    cout << "Anz Matches " << matches.size() << endl;
    namedWindow("matches", CV_WINDOW_NORMAL);
    imshow("matches", imgMatch);
    waitKey();
    return 0;
}

OpenCL BruteForceMatcher slow and faulty

Hello. I want to match pictures that differ great from one another and for that after lots of tests found the combination ORB+FREAK to be the best. I need at least 4000 keypoints to get reliable results across the images.

The problem is the computation time. I want to cut down on it as much as possible and for that looked into OpenCVs OpenCL implementation (since CUDA only works for NVidia cards, it's not an option for me).

However, not only is the BruteForceMatcher slower than the CPU by a factor of 1.8, it also has less matches. And that is something that shouldn't be possible.

My machine specs: Core 2 Duo E7300 @2,66GHz

Geforce 9500GT (not the fastest, but it shouldn't be this much slower!)

Windows XP 32Bit 4GB Ram (not all usable, but for this small application irrelevant) Visual Studio 2010 with compiled OpenCL module in release mode

The following is some example code. Parts of it are from the example in the OpenCV book. The important stuff happens in the main method. Pass the compiled exe two files and it will try to match them and outputs the result. To change between CPU/GPU, (un)comment the #define.

The images I used for comparison are first and second

The CPU version matches in 0.815s and finds 1530 matches.

The GPU version matches in 1.52s and finds 906 matches.

I am using OpenCV 2.4.3.

Why is the BruteForceMatcher_OCL not working correctly? Have others used it and if yes, what are you results?your results? Also, another bug I had: If I used more than 10000 keypoints, my application would crash with CL_OUT_OF_RESOURCES in initialization.cpp for the GPU version. How could the GPU use all of its memory on some small thousands of keypoints? Freak makes them so small that it shouldn't use much memory at all!

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/nonfree/features2d.hpp>
#include <opencv2/legacy/legacy.hpp>
#include "opencv2/ocl/ocl.hpp"
#include <CL/cl.h>

using namespace cv;
using namespace cv::ocl;
using namespace std;

int ratioTest(std::vector<std::vector<cv::DMatch>>
    &matches) {
        int removed=0;
        // for all matches
        for (std::vector<std::vector<cv::DMatch>>::iterator 
            matchIterator= matches.begin();
            matchIterator!= matches.end(); ++matchIterator) {
                // if 2 NN has been identified
                if (matchIterator->size() > 1) {
                    // check distance ratio
                    if ((*matchIterator)[0].distance/
                        (*matchIterator)[1].distance > 0.7) {
                            matchIterator->clear(); // remove match
                            removed++;
                    }
                } else { // does not have 2 neighbours
                    matchIterator->clear(); // remove match
                    removed++;
                }
        }
        return removed;
}

cv::Mat ransacTest(
    const std::vector<cv::DMatch>& matches,
    const std::vector<cv::KeyPoint>& keypoints1, 
    const std::vector<cv::KeyPoint>& keypoints2,
    std::vector<cv::DMatch>& outMatches) 
{
    // Convert keypoints into Point2f   
    std::vector<cv::Point2f> points1, points2;   
    for (std::vector<cv::DMatch>::
        const_iterator it= matches.begin();
        it!= matches.end(); ++it) {
            // Get the position of left keypoints
            float x= keypoints1[it->queryIdx].pt.x;
            float y= keypoints1[it->queryIdx].pt.y;
            points1.push_back(cv::Point2f(x,y));
            // Get the position of right keypoints
            x= keypoints2[it->trainIdx].pt.x;
            y= keypoints2[it->trainIdx].pt.y;
            points2.push_back(cv::Point2f(x,y));
    }
    // Compute F matrix using RANSAC
    std::vector<uchar> inliers(points1.size(),0);
    std::vector<cv::Point2f> out;
    //cv::Mat fundemental= cv::findFundamentalMat(points1, points2, out, CV_FM_RANSAC, 3, 0.99);

    cv::Mat fundemental= findFundamentalMat(
        cv::Mat(points1),cv::Mat(points2), // matching points
        inliers,      // match status (inlier or outlier)  
        CV_FM_RANSAC, // RANSAC method
        3.0,     // distance to epipolar line
        0.99);  // confidence probability   

    // extract the surviving (inliers) matches
    std::vector<uchar>::const_iterator 
        itIn= inliers.begin();
    std::vector<cv::DMatch>::const_iterator 
        itM= matches.begin();
    // for all matches
    for ( ;itIn!= inliers.end(); ++itIn, ++itM) {
        if (*itIn) { // it is a valid match
            outMatches.push_back(*itM);
        }
    }
    return fundemental;
}

void symmetryTest(
    const std::vector<std::vector<cv::DMatch>>& matches1,
    const std::vector<std::vector<cv::DMatch>>& matches2,
    std::vector<cv::DMatch>& symMatches) {         
        // for all matches image 1 -> image 2
        for (std::vector<std::vector<cv::DMatch>>::
            const_iterator matchIterator1= matches1.begin();
            matchIterator1!= matches1.end(); ++matchIterator1) {
                // ignore deleted matches
                if (matchIterator1->size() < 2) 
                    continue;
                // for all matches image 2 -> image 1
                for (std::vector<std::vector<cv::DMatch>>::
                    const_iterator matchIterator2= matches2.begin();
                    matchIterator2!= matches2.end(); 
                ++matchIterator2) {
                    // ignore deleted matches
                    if (matchIterator2->size() < 2) 
                        continue;
                    // Match symmetry test
                    if ((*matchIterator1)[0].queryIdx == 
                        (*matchIterator2)[0].trainIdx  && 
                        (*matchIterator2)[0].queryIdx == 
                        (*matchIterator1)[0].trainIdx) {
                            // add symmetrical match
                            symMatches.push_back(
                                cv::DMatch((*matchIterator1)[0].queryIdx,        
                                (*matchIterator1)[0].trainIdx,
                                (*matchIterator1)[0].distance));
                            break; // next match in image 1 -> image 2
                    }
                }
        }
}

int main( int argc, char** argv ) {

    vector<ocl::Info> info;
    ocl::getDevice(info);

    // Load images
    Mat imgA = imread(argv[1], CV_LOAD_IMAGE_GRAYSCALE );
    if( !imgA.data ) {
        std::cout<< " --(!) Error reading image " << argv[1] << std::endl;
        return -1;
    }

    Mat imgB = imread(argv[2], CV_LOAD_IMAGE_GRAYSCALE );
    if( !imgA.data ) {
        std::cout << " --(!) Error reading image " << argv[2] << std::endl;
        return -1;
    }

    vector<cv::KeyPoint> keypoints1, keypoints2;
    ORB detector = ORB(6000, 1.2F, 4, 21, 0, 2, 0, 21);
    detector.detect(imgA,keypoints1);
    detector.detect(imgB,keypoints2);

    Mat descriptors1, descriptors2;
    FREAK extractor(true, true, 22.0F, 4);

    extractor.compute(imgA,keypoints1,descriptors1);
    extractor.compute(imgB,keypoints2,descriptors2);

    std::vector< cv::DMatch > totalmatches;

    cv::BruteForceMatcher<cv::Hamming> matcher1, matcher2;

    std::vector<cv::DMatch> symMatches;
    std::vector<std::vector<cv::DMatch>> matches1, matches2;
    std::vector<cv::DMatch> matches;

#define GPU

    double t = (double)cv::getTickCount();

#ifdef GPU
    BruteForceMatcher_OCL<ocl::Hamming> oclMatcher;
    oclMat left_ocl;
    oclMat right_ocl;

    left_ocl.upload(descriptors1);
    right_ocl.upload(descriptors2);

    oclMatcher.knnMatch(left_ocl,right_ocl, matches1,2);
    oclMatcher.knnMatch(right_ocl,left_ocl, matches2,2);
#endif

#ifndef GPU
    matcher1.knnMatch(descriptors1,descriptors2, matches1,2);
    matcher2.knnMatch(descriptors2,descriptors1, matches2,2);
#endif

    t = ((double)cv::getTickCount() - t)/cv::getTickFrequency();
    std::cout << "matching time [s]: " << t << std::endl;

    // 3. Remove matches for which NN ratio is 
    // > than threshold
    // clean image 1 -> image 2 matches
    ratioTest(matches1);

    // clean image 2 -> image 1 matches
    ratioTest(matches2);
    // 4. Remove non-symmetrical matches
    symmetryTest(matches1,matches2,symMatches);

    // 5. Validate matches using RANSAC
    cv::Mat fundemental= ransacTest(symMatches, 
        keypoints1, keypoints2, matches);
    Mat imgMatch;
    drawMatches(imgA, keypoints1, imgB, keypoints2, matches, imgMatch, Scalar::all(-1), Scalar::all(-1), vector<char>(), 0);

    cout << "Anz Matches " << matches.size() << endl;
    namedWindow("matches", CV_WINDOW_NORMAL);
    imshow("matches", imgMatch);
    waitKey();
    return 0;
}