Could you share a python verision? #1

tyutlk · 2020-11-17T06:46:26Z

Hi,Thanks for your amazing work,i've recoginzed the thought of the Confusion.
But when i wanna test this code, i can't succeed cause i am poor in C++ and ros.
So ,could u share a python version? Thx!

jhultman · 2020-12-22T21:00:12Z

Hi, sorry for the delayed response -- missed your message. I will try to post a python version shortly.

jhultman · 2020-12-23T00:08:33Z

Hi, here's a simple python version. I will push it to master branch once I clean it up. It borrows some code from my other project vision3d.

It's not as fast as I remember unfortunately. I think the KD-Tree construction and queries are the bottleneck. You can look for a more efficient nearest-neighbor implementation if you have performance-critical application.

# Copyright Jacob Hultman 2020 (MIT license)

import cv2
import numpy as np
import scipy.spatial
import matplotlib.pyplot as plt
from collections import namedtuple


keys = ['V2C', 'C2V', 'R0', 'P2', 'WH']
Calib = namedtuple('Calib', keys)


class CalibObject:
    """
    3d XYZ in <label>.txt are in rect camera coord.
    Points in <lidar>.bin are in Velodyne coord.
    """

    def __init__(self, calib_filepath):
        calibs = self.read_calib_file(calib_filepath)
        # Rigid transform from Velodyne to reference camera
        self.V2C = calibs["V2C"].reshape(3, 4)
        # Inverse of above
        self.C2V = self.inverse_rigid_trans(self.V2C)
        # Rotation from reference camera to rect camera
        self.R0 = calibs["R0"].reshape(3, 3)
        # Projection matrix from rect camera to image2
        self.P2 = calibs["P2"].reshape(3, 4)
        # Hardcoded image shape (approximate)
        self.WH = np.r_[1224, 370]
        # Serializable representation
        self.astuple = Calib(self.V2C, self.C2V, self.R0, self.P2, self.WH)

    def read_calib_file(self, filepath):
        with open(filepath) as f:
            lines = f.readlines()
        obj = lines[2].strip().split(" ")[1:]
        P2 = np.array(obj, dtype=np.float32)
        obj = lines[3].strip().split(" ")[1:]
        P3 = np.array(obj, dtype=np.float32)
        obj = lines[4].strip().split(" ")[1:]
        R0 = np.array(obj, dtype=np.float32)
        obj = lines[5].strip().split(" ")[1:]
        Tr_velo_to_cam = np.array(obj, dtype=np.float32)
        transforms = dict(
            P2=P2.reshape(3, 4),
            P3=P3.reshape(3, 4),
            R0=R0.reshape(3, 3),
            V2C=Tr_velo_to_cam.reshape(3, 4),
        )
        return transforms

    def inverse_rigid_trans(self, Tr):
        inv_Tr = np.zeros_like(Tr)  # 3x4
        inv_Tr[0:3, 0:3] = np.transpose(Tr[0:3, 0:3])
        inv_Tr[0:3, 3] = np.dot(-np.transpose(Tr[0:3, 0:3]), Tr[0:3, 3])
        return inv_Tr


def main():
    # load sample KITTI data
    idx = f'{5:06d}'
    points = np.fromfile(f'./sample/velodyne/{idx}.bin', dtype=np.float32).reshape(-1, 4)[:, :3]
    image = cv2.imread(f'./sample/image_2/{idx}.png')[..., ::-1]
    calib = CalibObject(f'./sample/calib/{idx}.txt').astuple

    # project lidar points to image plane
    ones = np.ones_like(points[:, 0:1])
    p = np.concatenate((points, ones), axis=1)
    p = np.linalg.multi_dot((calib.R0, calib.V2C, p.T))
    p = (calib.P2 @ np.concatenate((p, ones.T), axis=0)).T
    pixel_indices = np.round(p[:, [0, 1]] / p[:, [2]]).astype(np.int32)[:, :2]
    keep_image = ((pixel_indices >= 0) & (pixel_indices < calib.WH)).all(1)

    # setup bird's eye view grid params
    lower = np.r_[5, -20]
    upper = np.r_[30, 20]
    pixel_size = np.r_[0.2, 0.2]
    Nx, Ny = ((upper - lower) / pixel_size).astype(np.int32)

    # build BEV image canvas and make meshgrid of query sites
    canvas = np.zeros((Nx, Ny, 3), dtype=np.float32)
    queries = np.mgrid[
        lower[0]:upper[0]:complex(Nx),
        lower[1]:upper[1]:complex(Ny)].transpose(1, 2, 0)

    # filter points within BEV bounds
    points_bev = points[:, :2]
    keep_bev = ((points_bev >= lower) & (points_bev < upper - pixel_size)).all(1)

    # carry out nearest neighbor query in BEV
    tree = scipy.spatial.KDTree(points_bev[keep_bev & keep_image])
    _, nbr_indices = tree.query(queries.reshape(-1, 2), k=3, distance_upper_bound=1)
    nbr_indices = nbr_indices.reshape(*queries.shape[:2], -1)

    # remove incomplete neighbors
    keep_nbrs = (nbr_indices != -1).all(-1)
    nbr_indices = nbr_indices[keep_nbrs] - 1  # not sure why seems to be one-indexed?

    # fetch rgb pixels using computed image pixel coordinates (U, V)
    j, i = pixel_indices[keep_bev & keep_image].T
    pixel_rgb = image[i, j]

    # populate BEV image canvas
    canvas[keep_nbrs] = pixel_rgb[nbr_indices].mean(1)
    canvas = np.clip(canvas, 0, 255).astype(np.uint8)[::-1]
    
    # plot BEV image
    fig, ax = plt.subplots(figsize=(16, 12))
    ax.imshow(canvas)
    plt.show()

Example output (KITTI frame 000005).

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Could you share a python verision? #1

Could you share a python verision? #1

tyutlk commented Nov 17, 2020

jhultman commented Dec 22, 2020

jhultman commented Dec 23, 2020 •

edited

Loading

Could you share a python verision? #1

Could you share a python verision? #1

Comments

tyutlk commented Nov 17, 2020

jhultman commented Dec 22, 2020

jhultman commented Dec 23, 2020 • edited Loading

jhultman commented Dec 23, 2020 •

edited

Loading