Revision history [back]

Calculate Euler angles after SolvePnp

I'm writing an iOS app that detect facial points using ML Kit, and then uses SolvePnp to calculate the pitch. I implemented the solution give here to solve pnp: https://www.learnopencv.com/head-pose-estimation-using-opencv-and-dlib/#code

That seems to work well, as the projected nose line drawn looks good.

Next, I try to convert the rotation vector to euler angles. I implemented this solution: http://answers.opencv.org/question/16796/computing-attituderoll-pitch-yaw-from-solvepnp/?answer=52913#post-id-52913

This part is where it seems to fall apart. The calculated yaw/pitch/roll are clearly wrong for my reference frame. Perhaps there an issue of converting between coordinate systems?

Here is my code:

```

+(NSArray*) estimatePose:(FIRVisionFace *)face imgSize:(CGSize)imgSize {

// Contour legend: https://firebase.google.com/docs/ml-kit/images/examples/face_contours.svg
FIRVisionFaceContour* faceOval = [face contourOfType:FIRFaceContourTypeFace];
FIRVisionFaceContour* leftEyeContour = [face contourOfType:FIRFaceContourTypeLeftEye];
FIRVisionFaceContour* rightEyeContour = [face contourOfType:FIRFaceContourTypeRightEye];
FIRVisionFaceContour* noseBridge = [face contourOfType:FIRFaceContourTypeNoseBridge];
FIRVisionFaceContour* upperLipTop = [face contourOfType:FIRFaceContourTypeUpperLipTop];

FIRVisionPoint* chin = faceOval.points[18];
FIRVisionPoint* leftEyeLeftCorner = leftEyeContour.points[0];
FIRVisionPoint* rightEyeRightCorner = rightEyeContour.points[8];
FIRVisionPoint* noseTip = noseBridge.points[1];
FIRVisionPoint* leftMouthCorner = upperLipTop.points[0];
FIRVisionPoint* rightMouthCorner = upperLipTop.points[10];

std::vector<cv::Point2d> image_points;
std::vector<cv::Point3d> model_points;

// 2D/3D model points using https://www.learnopencv.com/head-pose-estimation-using-opencv-and-dlib/#code
image_points.push_back( cv::Point2d(noseTip.x.doubleValue, noseTip.y.doubleValue) );    // Nose tip
image_points.push_back( cv::Point2d(chin.x.doubleValue, chin.y.doubleValue) );    // Chin
image_points.push_back( cv::Point2d(leftEyeLeftCorner.x.doubleValue, leftEyeLeftCorner.y.doubleValue) );    // Left eye left corner
image_points.push_back( cv::Point2d(rightEyeRightCorner.x.doubleValue, rightEyeRightCorner.y.doubleValue) );     // Right eye right corner
image_points.push_back( cv::Point2d(leftMouthCorner.x.doubleValue, leftMouthCorner.y.doubleValue) );    // Left Mouth corner
image_points.push_back( cv::Point2d(rightMouthCorner.x.doubleValue, rightMouthCorner.y.doubleValue) );    // Right mouth corner

model_points.push_back(cv::Point3d(0.0f, 0.0f, 0.0f));               // Nose tip
model_points.push_back(cv::Point3d(0.0f, -330.0f, -65.0f));          // Chin
model_points.push_back(cv::Point3d(-225.0f, 170.0f, -135.0f));       // Left eye left corner
model_points.push_back(cv::Point3d(225.0f, 170.0f, -135.0f));        // Right eye right corner
model_points.push_back(cv::Point3d(-150.0f, -150.0f, -125.0f));      // Left Mouth corner
model_points.push_back(cv::Point3d(150.0f, -150.0f, -125.0f));       // Right mouth corner

double focal_length = imgSize.width; // Approximate focal length.
cv::Point2d center = cv::Point2d(imgSize.width / 2, imgSize.height / 2);
cv::Mat camera_matrix = (cv::Mat_<double>(3,3) << focal_length, 0, center.x, 0 , focal_length, center.y, 0, 0, 1);
cv::Mat dist_coeffs = cv::Mat::zeros(4,1,cv::DataType<double>::type); // Assuming no lens distortion

// Output rotation and translation
cv::Mat rotation_vector; // Rotation in axis-angle form
cv::Mat translation_vector;

// Solve for pose
cv::solvePnP(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector);

// Calculate a point to draw line from nose tip.
std::vector<cv::Point3d> nose_end_point3D;
std::vector<cv::Point2d> nose_end_point2D;
nose_end_point3D.push_back(cv::Point3d(0,0,1000.0));
cv::projectPoints(nose_end_point3D, rotation_vector, translation_vector, camera_matrix, dist_coeffs, nose_end_point2D);
NSArray *noseLine = [NSArray arrayWithObjects:
                     [NSValue valueWithCGPoint:CGPointMake(noseTip.x.doubleValue, noseTip.y.doubleValue)],
                     [NSValue valueWithCGPoint:CGPointMake(nose_end_point2D[0].x, nose_end_point2D[0].y)],
                     nil];

// Convert rotation vector to yaw/pitch/roll:
// http://answers.opencv.org/question/16796/computing-attituderoll-pitch-yaw-from-solvepnp/?answer=52913#post-id-52913

cv::Mat rodrigues_rotation_vector;
cv::Rodrigues(rotation_vector, rodrigues_rotation_vector);

cv::Vec3d euler_angles;
getEulerAngles(rodrigues_rotation_vector, euler_angles);

NSLog(@"mlkit yaw = %f, roll = %f", face.headEulerAngleY, face.headEulerAngleZ);    
NSLog(@"opencv yaw = %f, pitch = %f, roll = %f", euler_angles[1], euler_angles[0], euler_angles[2]);

return noseLine;

}

void getEulerAngles(cv::Mat &rotCamerMatrix,cv::Vec3d &euler_angles) {

cv::Mat cameraMatrix, rotMatrix, transVect, rotMatrixX, rotMatrixY, rotMatrixZ;
double* _r = rotCamerMatrix.ptr<double>();
double projMatrix[12] = {
    _r[0], _r[1], _r[2], 0,
    _r[3], _r[4], _r[5], 0,
    _r[6], _r[7], _r[8], 0
};

decomposeProjectionMatrix( cv::Mat(3, 4, CV_64FC1, projMatrix),
                          cameraMatrix,
                          rotMatrix,
                          transVect,
                          rotMatrixX,
                          rotMatrixY,
                          rotMatrixZ,
                          euler_angles);

}

```

For example, when I face straight to the camera, I get the following: mlkit yaw = 3.786244, roll = 3.352636 opencv yaw = -1.416621, pitch = -179.549207, roll = -5.026994

And when I face left (pitch close to flat), I get the following: mlkit yaw = -19.004604, roll = 4.542935 opencv yaw = -65.307372, pitch = -6.605039, roll = -57.922035

What am I doing wrong?