I'm writing an iOS app that detect facial points using ML Kit, and then uses SolvePnp to calculate the pitch. I implemented the solution give here to solve pnp: https://www.learnopencv.com/head-pose-estimation-using-opencv-and-dlib/#code
That seems to work well, as the projected nose line drawn looks good.
Next, I try to convert the rotation vector to euler angles. I implemented this solution: http://answers.opencv.org/question/16796/computing-attituderoll-pitch-yaw-from-solvepnp/?answer=52913#post-id-52913
This part is where it seems to fall apart. The calculated yaw/pitch/roll are clearly wrong for my reference frame. Perhaps there an issue of converting between coordinate systems?
Here is my code:
```
+(NSArray*) estimatePose:(FIRVisionFace *)face imgSize:(CGSize)imgSize {
// Contour legend: https://firebase.google.com/docs/ml-kit/images/examples/face_contours.svg
FIRVisionFaceContour* faceOval = [face contourOfType:FIRFaceContourTypeFace];
FIRVisionFaceContour* leftEyeContour = [face contourOfType:FIRFaceContourTypeLeftEye];
FIRVisionFaceContour* rightEyeContour = [face contourOfType:FIRFaceContourTypeRightEye];
FIRVisionFaceContour* noseBridge = [face contourOfType:FIRFaceContourTypeNoseBridge];
FIRVisionFaceContour* upperLipTop = [face contourOfType:FIRFaceContourTypeUpperLipTop];
FIRVisionPoint* chin = faceOval.points[18];
FIRVisionPoint* leftEyeLeftCorner = leftEyeContour.points[0];
FIRVisionPoint* rightEyeRightCorner = rightEyeContour.points[8];
FIRVisionPoint* noseTip = noseBridge.points[1];
FIRVisionPoint* leftMouthCorner = upperLipTop.points[0];
FIRVisionPoint* rightMouthCorner = upperLipTop.points[10];
std::vector<cv::Point2d> image_points;
std::vector<cv::Point3d> model_points;
// 2D/3D model points using https://www.learnopencv.com/head-pose-estimation-using-opencv-and-dlib/#code
image_points.push_back( cv::Point2d(noseTip.x.doubleValue, noseTip.y.doubleValue) ); // Nose tip
image_points.push_back( cv::Point2d(chin.x.doubleValue, chin.y.doubleValue) ); // Chin
image_points.push_back( cv::Point2d(leftEyeLeftCorner.x.doubleValue, leftEyeLeftCorner.y.doubleValue) ); // Left eye left corner
image_points.push_back( cv::Point2d(rightEyeRightCorner.x.doubleValue, rightEyeRightCorner.y.doubleValue) ); // Right eye right corner
image_points.push_back( cv::Point2d(leftMouthCorner.x.doubleValue, leftMouthCorner.y.doubleValue) ); // Left Mouth corner
image_points.push_back( cv::Point2d(rightMouthCorner.x.doubleValue, rightMouthCorner.y.doubleValue) ); // Right mouth corner
model_points.push_back(cv::Point3d(0.0f, 0.0f, 0.0f)); // Nose tip
model_points.push_back(cv::Point3d(0.0f, -330.0f, -65.0f)); // Chin
model_points.push_back(cv::Point3d(-225.0f, 170.0f, -135.0f)); // Left eye left corner
model_points.push_back(cv::Point3d(225.0f, 170.0f, -135.0f)); // Right eye right corner
model_points.push_back(cv::Point3d(-150.0f, -150.0f, -125.0f)); // Left Mouth corner
model_points.push_back(cv::Point3d(150.0f, -150.0f, -125.0f)); // Right mouth corner
double focal_length = imgSize.width; // Approximate focal length.
cv::Point2d center = cv::Point2d(imgSize.width / 2, imgSize.height / 2);
cv::Mat camera_matrix = (cv::Mat_<double>(3,3) << focal_length, 0, center.x, 0 , focal_length, center.y, 0, 0, 1);
cv::Mat dist_coeffs = cv::Mat::zeros(4,1,cv::DataType<double>::type); // Assuming no lens distortion
// Output rotation and translation
cv::Mat rotation_vector; // Rotation in axis-angle form
cv::Mat translation_vector;
// Solve for pose
cv::solvePnP(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector);
// Calculate a point to draw line from nose tip.
std::vector<cv::Point3d> nose_end_point3D;
std::vector<cv::Point2d> nose_end_point2D;
nose_end_point3D.push_back(cv::Point3d(0,0,1000.0));
cv::projectPoints(nose_end_point3D, rotation_vector, translation_vector, camera_matrix, dist_coeffs, nose_end_point2D);
NSArray *noseLine = [NSArray arrayWithObjects:
[NSValue valueWithCGPoint:CGPointMake(noseTip.x.doubleValue, noseTip.y.doubleValue)],
[NSValue valueWithCGPoint:CGPointMake(nose_end_point2D[0].x, nose_end_point2D[0].y)],
nil];
// Convert rotation vector to yaw/pitch/roll:
// http://answers.opencv.org/question/16796/computing-attituderoll-pitch-yaw-from-solvepnp/?answer=52913#post-id-52913
cv::Mat rodrigues_rotation_vector;
cv::Rodrigues(rotation_vector, rodrigues_rotation_vector);
cv::Vec3d euler_angles;
getEulerAngles(rodrigues_rotation_vector, euler_angles);
NSLog(@"mlkit yaw = %f, roll = %f", face.headEulerAngleY, face.headEulerAngleZ);
NSLog(@"opencv yaw = %f, pitch = %f, roll = %f", euler_angles[1], euler_angles[0], euler_angles[2]);
return noseLine;
}
void getEulerAngles(cv::Mat &rotCamerMatrix,cv::Vec3d &euler_angles) {
cv::Mat cameraMatrix, rotMatrix, transVect, rotMatrixX, rotMatrixY, rotMatrixZ;
double* _r = rotCamerMatrix.ptr<double>();
double projMatrix[12] = {
_r[0], _r[1], _r[2], 0,
_r[3], _r[4], _r[5], 0,
_r[6], _r[7], _r[8], 0
};
decomposeProjectionMatrix( cv::Mat(3, 4, CV_64FC1, projMatrix),
cameraMatrix,
rotMatrix,
transVect,
rotMatrixX,
rotMatrixY,
rotMatrixZ,
euler_angles);
}
```
For example, when I face straight to the camera, I get the following:
mlkit yaw = 3.786244, roll = 3.352636
opencv yaw = -1.416621, pitch = -179.549207, roll = -5.026994
And when I face left (pitch close to flat), I get the following:
mlkit yaw = -19.004604, roll = 4.542935
opencv yaw = -65.307372, pitch = -6.605039, roll = -57.922035
What am I doing wrong?