diff --git a/main.py b/main.py index bd58516..b2f4e41 100644 --- a/main.py +++ b/main.py @@ -2,28 +2,30 @@ import cv2 import argparse import matplotlib.pyplot as plt -import ipdb from stereoVO.configs import yaml_parser from stereoVO.datasets import KittiDataset from stereoVO.utils import rmse_error, draw_trajectory_2D, draw_trajectory_3D, draw_rmse_error from stereoVO.model import StereoVO + def parse_argument(): - - parser=argparse.ArgumentParser() + + parser = argparse.ArgumentParser() parser.add_argument('--config_path', default='configs/params.yaml') return parser.parse_args() + def draw_trajectory(traj, frame_id, x, y, z, draw_x, draw_y, true_x, true_y): - - cv2.circle(traj, (draw_x,draw_y), 1, (frame_id*255/4540,255-frame_id*255/4540,0), 1) - cv2.circle(traj, (true_x,true_y), 1, (0,0,255), 2) - cv2.rectangle(traj, (10, 20), (600, 60), (0,0,0), -1) - text = "Coordinates: x=%2fm y=%2fm z=%2fm"%(x,y,z) - cv2.putText(traj, text, (20,40), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,255), 1, 8) + + cv2.circle(traj, (draw_x, draw_y), 1, (frame_id*255/4540, 255-frame_id*255/4540, 0), 1) + cv2.circle(traj, (true_x, true_y), 1, (0, 0, 255), 2) + cv2.rectangle(traj, (10, 20), (600, 60), (0, 0, 0), -1) + text = "Coordinates: x=%2fm y=%2fm z=%2fm"%(x, y, z) + cv2.putText(traj, text, (20, 40), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1, 8) cv2.imshow('Trajectory', traj) + def main(): args = parse_argument() diff --git a/setup/setup.py b/setup.py similarity index 100% rename from setup/setup.py rename to setup.py diff --git a/stereoVO/model/drivers.py b/stereoVO/model/drivers.py new file mode 100644 index 0000000..488540a --- /dev/null +++ b/stereoVO/model/drivers.py @@ -0,0 +1,145 @@ +""" +Author : Saksham Jindal +Date : January 15, 2020 +""" + +import cv2 +import numpy as np +from scipy.optimize import least_squares + +from stereoVO.optimization import get_minimization +from stereoVO.geometry import (DetectionEngine, + TrackingEngine, + filter_matching_inliers, + triangulate_points, + filter_triangulated_points) + + +class StereoDrivers(): + + """ + Base class for all driver code for running the engines + """ + + # To Do : Initialise __init__ module and global variables to be used in the subclass + + def _do_optimization(self, r_mat, t_vec): + + """ + To Do : Add docstring here + """ + + # Convert the matrix from world coordinates(prevState) to camera coordinates (currState) + t_vec = -r_mat.T @ t_vec + r_mat = r_mat.T + r_vec, _ = cv2.Rodrigues(r_mat) + + # Prepare an initial set of parameters to the optimizer + doF = np.concatenate((r_vec, t_vec)).flatten() + + # Prepare the solver for minimization + optRes = least_squares(get_minimization, doF, method='lm', max_nfev=2000, + args=(self.prevState.P3P_pts3D, + self.currState.pointsTracked.left, + self.currState.pointsTracked.right, + self.PL,self.PR)) + + # r_vec and t_vec obtained are in camera coordinate frames (currState) + # we need to convert these matrix to world coordinates system (prevState) + opt_rvec_cam = (optRes.x[:3]).reshape(-1,1) + opt_tvec_cam = (optRes.x[3:]).reshape(-1,1) + opt_rmat_cam, _ = cv2.Rodrigues(opt_rvec_cam) + + # Obtain the pose of the camera (wrt state of the previous camera) + r_mat = opt_rmat_cam.T + t_vec = -opt_rmat_cam.T @ opt_tvec_cam + + return r_mat, t_vec + + def _solve_pnp(self): + + """ + To Do : Add docstring here + """ + + args_pnpSolver = self.params.geometry.pnpSolver + + for i in range(args_pnpSolver.numTrials): + + _, r_vec, t_vec, idxPose = cv2.solvePnPRansac(self.prevState.pts3D_Tracking, + self.currState.pointsTracked.left, + self.intrinsic, + None, + iterationsCount=args_pnpSolver.numTrials, + reprojectionError=args_pnpSolver.reprojectionError, + confidence=args_pnpSolver.confidence, + flags=cv2.SOLVEPNP_P3P) + + + r_mat, _ = cv2.Rodrigues(r_vec) + + # r_vec and t_vec obtained are in camera coordinate frames + # we need to convert these matrices in world coordinates system + # or we need to transforms the matrix from currState to prevState + t_vec = -r_mat.T @ t_vec + r_mat = r_mat.T + + idxPose = idxPose.flatten() + + # ratio = len(idxPose)/len(self.prevState.pts3D_Tracking) + # scale = np.linalg.norm(t_vec) + + # if scale < args_pnpSolver.deltaT and ratio > args_pnpSolver.minRatio: + # print("Scale of translation of camera : {}".format(scale)) + # print("Solution obtained in P3P Iteration : {}".format(i+1)) + # print("Ratio of Inliers : {}".format(ratio)) + # break + # else: + # print("Warning : Max Iter : {} reached, still large position delta produced".format(i)) + + self.currState.pointsTracked = (self.currState.pointsTracked.left[idxPose], self.currState.pointsTracked.right[idxPose]) + self.prevState.P3P_pts3D = self.prevState.pts3D_Tracking[idxPose] + + return r_mat, t_vec + + def _process_feature_tracking(self): + + """ + To Do : Add docstring here + """ + + prevFrames = self.prevState.frames + currFrames = self.currState.frames + prevInliers = self.prevState.InliersFilter + + # Feature Tracking from prev state to current state + tracker = TrackingEngine(prevFrames, currFrames, prevInliers, self.intrinsic, self.params) + tracker.process_tracked_features() + self.prevState.inliersTracking, self.currState.pointsTracked, self.prevState.pts3D_Tracking = tracker.filter_inliers(self.prevState.pts3D_Filter) + + def _update_stereo_state(self, stereoState): + + """ + To Do : Add docstring here + """ + + # Detection Engine, Matching and Triangulation for first frame + detection_engine = DetectionEngine(stereoState.frames.left, stereoState.frames.right, self.params) + + stereoState.matchedPoints, stereoState.keyPoints, stereoState.descriptors = detection_engine.get_matching_keypoints() + + stereoState.inliers, _ = filter_matching_inliers(stereoState.matchedPoints.left, + stereoState.matchedPoints.right, + self.intrinsic, + self.params) + + stereoState.pts3D, reproj_error = triangulate_points(stereoState.inliers.left, + stereoState.inliers.right, + self.PL, + self.PR) + + args_triangulation = self.params.geometry.triangulation + stereoState.pts3D_Filter, maskTriangulationFilter, ratioFilter = filter_triangulated_points(stereoState.pts3D, reproj_error, **args_triangulation) + + stereoState.InliersFilter = stereoState.inliers.left[maskTriangulationFilter], stereoState.inliers.right[maskTriangulationFilter] + stereoState.ratioTriangulationFilter = ratioFilter \ No newline at end of file diff --git a/stereoVO/model/stereoVO.py b/stereoVO/model/stereoVO.py index 689a11c..cb016e3 100644 --- a/stereoVO/model/stereoVO.py +++ b/stereoVO/model/stereoVO.py @@ -1,25 +1,24 @@ -import numpy as np -import cv2 -from scipy.optimize import least_squares +from stereoVO.structures import VO_StateMachine +from stereoVO.model.drivers import StereoDrivers -from stereoVO.structures import StateBolts, VO_StateMachine -from stereoVO.optimization import get_minimization -from stereoVO.geometry import (DetectionEngine, - TrackingEngine, - filter_matching_inliers, - triangulate_points, - filter_triangulated_points) +class StereoVO(StereoDrivers): -import ipdb - - -class StereoVO(): + """ + StereoVo : is the main driver code which calls upon drivers codes for DetectionEngine, TrackingEngine, PnP solver, Optimization + to calculate the relative location and orientation of a stereo state at particular time instant + """ def __init__(self, intrinsic, PL, PR, params): """ - To Do : Add docstring here + :param intrinsic (np.array): size (3x3) camera calibration parameters + :param PL (np.array): size (3x4) left projection matrix such that x_L = PL * X_w + :param PR (np.array): size (3x4) right projection matrix such that x_R = PR * X_w + (where world coordinates are in the frame of the left camera) + :param params (AttriDict): contains parameters for the stereo configuration, + detection of features, tracking and other geomtric + computer vision features """ self.intrinsic = intrinsic @@ -30,194 +29,114 @@ def __init__(self, intrinsic, PL, PR, params): def __call__(self, left_frame, right_frame, state_num): """ - To Do : Add docstring here - """ + Used for calling the object of stereVO and processing the stereo states - if state_num == 0: - # Initialise the initial stereo state - self.prevState = VO_StateMachine(state_num) - self.prevState.frames = left_frame, right_frame + :param left_frame (np.array): of size (HxWx3) of stereo configuration + :param right_frame (np.array): of size (HxWx3) of stereo configuation + :param state_num (int): counter for specifying the frame number of stereo state - # Update the initial stereo state with detection and triangualation - self._update_stereo_state(self.prevState) - - # Initialize the pose of the camera - self.prevState.location = np.array(self.params.initial.location) - self.prevState.orientation = np.array(self.params.initial.orientation) + Returns + VO_StateMachine::location (np.array): size (3) - translation of stereo state (left camera) relative to initial location + VO_StateMachine::orientation (np.array): size (3x3) - rotation of stereo state (right camera) relative to intial orientation + """ + if state_num == 0: + self._process_first_frame(left_frame, right_frame, state_num) return self.prevState.location, self.prevState.orientation - if state_num == 1: - # Initialise the current stereo state - self.currState = VO_StateMachine(state_num) - self.currState.frames = left_frame, right_frame - - # Update the initial stereo state with detection and triangualation - self._update_stereo_state(self.currState) - - # Feature Tracking from prevState to currState - self._process_feature_tracking() - - # P3P Solver - # obtains the pose of the camera in coordinate frame of prevState - r_mat, t_vec = self.solve_pnp() - - # if optimisation is enabled - # do pose updation with the optimizer - if self.params.geometry.lsqsolver.enable: - r_mat, t_vec = self._do_optimization(r_mat, t_vec) - - # Upating the pose of the camera of currState - # C_n = C_n-1 * dT_n-1; where dT_n-1 is in the - # reference of coordinate system of the second camera - self.currState.orientation = self.prevState.orientation @ r_mat - self.currState.location = self.prevState.orientation @ t_vec + self.prevState.location.reshape(-1,1) - self.currState.location = self.currState.location.flatten() - - self.currState.keypoints = self.currState.pointsTracked - self.currState.landmarks = self.prevState.P3P_pts3D - + self._process_second_frame(left_frame, right_frame, state_num) else: - self.currState = VO_StateMachine(state_num) - self.currState.frames = left_frame, right_frame + self._process_continuous_frame(left_frame, right_frame, state_num) - self._process_feature_tracking() + # To Do : Add logger object instead + print("Frame {} Processing Done ...................".format(state_num + s1)) + print("Current Location : X : {x}, Y = {y}, Z = {z}".format(x=self.currState.location[0], + y=self.currState.location[1], + z=self.currState.location[2])) - r_mat, t_vec = self.solve_pnp() + self.prevState = self.currState - if self.params.geometry.lsqsolver.enable: - r_mat, t_vec = self._do_optimization(r_mat, t_vec) + return self.currState.location, self.currState.orientation - self.currState.orientation = self.prevState.orientation @ r_mat - self.currState.location = self.prevState.orientation @ t_vec + self.prevState.location.reshape(-1,1) - self.currState.location = self.currState.location.flatten() + def _process_first_frame(self, left_frame, right_frame, state_num): - self._update_stereo_state(self.currState) + """ + Processes first frame of the stereo state + """ - print("Frame {} Processing Done ....".format(state_num + 1)) - print("Current Location : X : {x}, Y = {y}, Z = {z}".format(x = self.currState.location[0], - y = self.currState.location[1], - z = self.currState.location[2])) + # Initialise the initial stereo state + self.prevState = VO_StateMachine(state_num) + self.prevState.frames = left_frame, right_frame - self.prevState = self.currState + # Update the initial stereo state with detection and triangualation + self._update_stereo_state(self.prevState) - return self.currState.location, self.currState.orientation + # Initialize the pose of the camera + self.prevState.location = np.array(self.params.initial.location) + self.prevState.orientation = np.array(self.params.initial.orientation) - def _do_optimization(self, r_mat, t_vec): + def _process_second_frame(self, left_frame, right_frame, state_num): """ - To Do : Add docstring here + Processes second frame of the stereo state """ - # Convert the matrix from world coordinates(prevState) to camera coordinates (currState) - t_vec = -r_mat.T @ t_vec - r_mat = r_mat.T - r_vec, _ = cv2.Rodrigues(r_mat) - - # Prepare an initial set of parameters to the optimizer - doF = np.concatenate((r_vec, t_vec)).flatten() - - # Prepare the solver for minimization - optRes = least_squares(get_minimization, doF, method='lm', max_nfev=2000, - args=(self.prevState.P3P_pts3D, self.currState.pointsTracked.left, - self.currState.pointsTracked.right, self.PL,self.PR)) - - # r_vec and t_vec obtained are in camera coordinate frames (currState) - # we need to convert these matrix to world coordinates system (prevState) - opt_rvec_cam = (optRes.x[:3]).reshape(-1,1) - opt_tvec_cam = (optRes.x[3:]).reshape(-1,1) - opt_rmat_cam,_ = cv2.Rodrigues(opt_rvec_cam) - - # Obtain the pose of the camera (wrt state of the previous camera) - r_mat = opt_rmat_cam.T - t_vec = -opt_rmat_cam.T @ opt_tvec_cam - - return r_mat, t_vec - - def solve_pnp(self): + self._process_second_frame(left_frame, right_frame, state_num) - """ - To Do : Add docstring here - """ + # Initialise the current stereo state + self.currState = VO_StateMachine(state_num) + self.currState.frames = left_frame, right_frame - args_pnpSolver = self.params.geometry.pnpSolver - - for i in range(args_pnpSolver.numTrials): - - _, r_vec, t_vec, idxPose = cv2.solvePnPRansac(self.prevState.pts3D_Tracking, - self.currState.pointsTracked.left, - self.intrinsic, - None, - iterationsCount=args_pnpSolver.numTrials, - reprojectionError=args_pnpSolver.reprojectionError, - confidence=args_pnpSolver.confidence, - flags=cv2.SOLVEPNP_P3P) - - - r_mat, _ = cv2.Rodrigues(r_vec) - - # r_vec and t_vec obtained are in camera coordinate frames - # we need to convert these matrices in world coordinates system - # or we need to transforms the matrix from currState to prevState - t_vec = -r_mat.T @ t_vec - r_mat = r_mat.T - - idxPose = idxPose.flatten() - - ratio = len(idxPose)/len(self.prevState.pts3D_Tracking) - scale = np.linalg.norm(t_vec) - - # if scaleargs_pnpSolver.minRatio: - # print("Scale of translation of camera : {}".format(scale)) - # print("Solution obtained in P3P Iteration : {}".format(i+1)) - # print("Ratio of Inliers : {}".format(ratio)) - # break - # else: - # print("Warning : Max Iter : {} reached, still large position delta produced".format(i)) - - self.currState.pointsTracked = (self.currState.pointsTracked.left[idxPose], self.currState.pointsTracked.right[idxPose]) - self.prevState.P3P_pts3D = self.prevState.pts3D_Tracking[idxPose] - - return r_mat, t_vec - - def _process_feature_tracking(self): + # Update the initial stereo state with detection and triangualation + self._update_stereo_state(self.currState) - """ - To Do : Add docstring here - """ - - prevFrames = self.prevState.frames - currFrames = self.currState.frames - prevInliers = self.prevState.InliersFilter + # Feature Tracking from prevState to currState + self._process_feature_tracking() + + # P3P Solver + # obtains the pose of the camera in coordinate frame of prevState + r_mat, t_vec = self._solve_pnp() + + # if optimisation is enabled + # do pose updation with the optimizer + if self.params.geometry.lsqsolver.enable: + r_mat, t_vec = self._do_optimization(r_mat, t_vec) - # Feature Tracking from prev state to current state - tracker = TrackingEngine(prevFrames, currFrames, prevInliers, self.intrinsic, self.params) - tracker.process_tracked_features() - self.prevState.inliersTracking, self.currState.pointsTracked, self.prevState.pts3D_Tracking = tracker.filter_inliers(self.prevState.pts3D_Filter) + # Upating the pose of the camera of currState + # C_n = C_n-1 * dT_n-1; where dT_n-1 is in the + # reference of coordinate system of the second camera + self.currState.orientation = self.prevState.orientation @ r_mat + self.currState.location = (self.prevState.orientation @ t_vec + + self.prevState.location.reshape(-1,1) + self.currState.location = self.currState.location.flatten() - def _update_stereo_state(self, stereoState): + def _process_continuous_frame(self, left_frame, right_frame, state_num): """ - To Do : Add docstring here + Processes stereo state for frames after intial processing of first 2 frames """ - # Detection Engine, Matching and Triangulation for first frame - detection_engine = DetectionEngine(stereoState.frames.left, stereoState.frames.right, self.params) + # Initialise the current stereo state + self.currState = VO_StateMachine(state_num) + self.currState.frames = left_frame, right_frame - stereoState.matchedPoints, stereoState.keyPoints, stereoState.descriptors = detection_engine.get_matching_keypoints() + # Feature Tracking from prevState to currState + self._process_feature_tracking() - stereoState.inliers, _ = filter_matching_inliers(stereoState.matchedPoints.left, - stereoState.matchedPoints.right, - self.intrinsic, - self.params) + # P3P Solver + # obtains the pose of the camera in coordinate frame of prevState + r_mat, t_vec = self._solve_pnp() - stereoState.pts3D, reproj_error = triangulate_points(stereoState.inliers.left, - stereoState.inliers.right, - self.PL, - self.PR) + if self.params.geometry.lsqsolver.enable: + r_mat, t_vec = self._do_optimization(r_mat, t_vec) - args_triangulation = self.params.geometry.triangulation - stereoState.pts3D_Filter, maskTriangulationFilter, ratioFilter = filter_triangulated_points(stereoState.pts3D, reproj_error, **args_triangulation) + # Upating the pose of the camera of currState + # C_n = C_n-1 * dT_n-1; where dT_n-1 is in the + # reference of coordinate system of the second camera + self.currState.orientation = self.prevState.orientation @ r_mat + self.currState.location = (self.prevState.orientation @ t_vec + + self.prevState.location.reshape(-1,1) + self.currState.location = self.currState.location.flatten() - stereoState.InliersFilter = stereoState.inliers.left[maskTriangulationFilter], stereoState.inliers.right[maskTriangulationFilter] - stereoState.ratioTriangulationFilter = ratioFilter \ No newline at end of file + # Update the initial stereo state with detection and triangualation + self._update_stereo_state(self.currState) \ No newline at end of file diff --git a/stereoVO/structures/engines.py b/stereoVO/structures/engines.py deleted file mode 100644 index 66348e4..0000000 --- a/stereoVO/structures/engines.py +++ /dev/null @@ -1,52 +0,0 @@ -# import cv2 - -# from .components import StateBolts -# from ..geometry.feature_detectors import getMatchingKeypoints, showMatchedFeatures - - -# class DetectionEngine(): - -# def __init__(self, frames, params): - -# self.frames = frames -# self.params = params - -# self.matchedPoints = StateBolts() -# self.keypoints = StateBolts() -# self.descriptors = StateBolts() -# self.inliers = StateBolts() - -# def matching_keypoints(self): -# if self.matchedPoints is None: -# self.process_matching_keypoints() -# return self.matchedPoints, self.keypoints, self.descriptors - -# def process_matching_keypoints(self): -# self.matchedPoints, self.keypoints, self.descriptors = getMatchingKeypoints(self.frames.left, self.frames.right, self.params) - -# def show_matching_keypoints(self): -# showMatchedFeatures(self.left_frame, self.right_frame) - -# def filter_matching_inliers(self): - -# for i in range(self.params.GeoComp.EM.num_trials): -# E, mask = cv2.findEssentialMatrix(self.matchedPoints.left, -# self.matchedPoints.right, self.params) -# mask = mask.astype(bool) -# ratio = sum(mask)/len(mask.flatten()) - -# if ratio > self.params.EM.inilierRatio: -# print("Iterations : 5 point Algorithm : {}".format(i+1)) -# print("Inlier Ratio : {}".format(ratio)) -# break -# else: -# print("Failed to Calculate E, Iter : {}".format(i)) -# if i==self.params.GeoComp.EM.num_trials-1: -# print("Maxinum interation in 5-point Algorithm reached") -# else: -# print("Running Iterations Again. Iters left : {}".format(self.params.geocom.EM.num_trials-1)) - -# self.inliers.left = self.matchedPoints.left(mask) -# self.inliers.right = self.matchedPoints.right(mask) - -# return self.inliers.left, self.inliers.right \ No newline at end of file