-
Notifications
You must be signed in to change notification settings - Fork 115
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Converting depth value to point cloud (#800)
* update fov config add collect_sensors.py * fix magic number * camera mask for debugging drawer * workable version * revert camera fov * add example * delete debug example * add test * add point cloud to it * update doc and readme * fix simgen test * remove check for duplicated sensor * format * don not test simgen --------- Co-authored-by: Alan <[email protected]>
- Loading branch information
1 parent
12b2bdc
commit 9273c2d
Showing
13 changed files
with
307 additions
and
92 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
import numpy as np | ||
from panda3d.core import Point3 | ||
from scipy.spatial.transform import Rotation as R | ||
|
||
from metadrive.component.sensors.depth_camera import DepthCamera | ||
|
||
|
||
class PointCloudLidar(DepthCamera): | ||
""" | ||
This can be viewed as a special camera sensor, whose RGB channel is (x, y ,z) world coordinate of the point cloud. | ||
Thus, it is compatible with all image related stuff. For example, you can use it with ImageObservation. | ||
""" | ||
num_channels = 3 # x, y, z coordinates | ||
|
||
def __init__(self, width, height, ego_centric, engine, *, cuda=False): | ||
""" | ||
If ego_centric is True, the point cloud will be in the camera's ego coordinate system. | ||
""" | ||
if cuda: | ||
raise ValueError("LiDAR does not support CUDA acceleration for now. Ask for support if you need it.") | ||
super(PointCloudLidar, self).__init__(width, height, engine, cuda=False) | ||
self.ego_centric = ego_centric | ||
|
||
def get_rgb_array_cpu(self): | ||
""" | ||
The result of this function is now a 3D array of point cloud coord in shape (H, W, 3) | ||
The lens parameters can be changed on the fly! | ||
""" | ||
|
||
lens = self.lens | ||
fov = lens.getFov() | ||
f_x = self.BUFFER_W / 2 / (np.tan(fov[0] / 2 / 180 * np.pi)) | ||
f_y = self.BUFFER_H / 2 / (np.tan(fov[1] / 2 / 180 * np.pi)) | ||
intrinsics = np.asarray([[f_x, 0, (self.BUFFER_H - 1) / 2], [0, f_y, (self.BUFFER_W - 1) / 2], [0, 0, 1]]) | ||
f = lens.getFar() | ||
n = lens.getNear() | ||
|
||
depth = super(PointCloudLidar, self).get_rgb_array_cpu() | ||
hpr = self.cam.getHpr(self.engine.render) | ||
hpr[0] += 90 # pand3d's y is the camera facing direction, so we need to rotate it 90 degree | ||
hpr[1] *= -1 # left right handed convert | ||
rot = R.from_euler('ZYX', hpr, degrees=True) | ||
rotation_matrix = rot.as_matrix() | ||
translation = Point3(0, 0, 0) | ||
if not self.ego_centric: | ||
translation = np.asarray(self.engine.render.get_relative_point(self.cam, Point3(0, 0, 0))) | ||
z_eye = 2 * n * f / ((f + n) - (2 * depth - 1) * (f - n)) | ||
points = self.simulate_lidar_from_depth(z_eye.squeeze(-1), intrinsics, translation, rotation_matrix) | ||
return points | ||
|
||
@staticmethod | ||
def simulate_lidar_from_depth(depth_img, camera_intrinsics, camera_translation, camera_rotation): | ||
""" | ||
Simulate LiDAR points in the world coordinate system from a depth image. | ||
Parameters: | ||
depth_img (np.ndarray): Depth image of shape (H, W, 3), where the last dimension represents RGB or grayscale depth values. | ||
camera_intrinsics (np.ndarray): Camera intrinsic matrix of shape (3, 3). | ||
camera_translation (np.ndarray): Translation vector of the camera in world coordinates of shape (3,). | ||
camera_rotation (np.ndarray): Rotation matrix of the camera in world coordinates of shape (3, 3). | ||
Returns: | ||
np.ndarray: LiDAR points in the world coordinate system of shape (N, 3), where N is the number of valid points. | ||
""" | ||
# Extract the depth channel (assuming it's grayscale or depth is in the R channel) | ||
# Get image dimensions | ||
depth_img = depth_img.T | ||
depth_img = depth_img[::-1, ::-1] | ||
height, width = depth_img.shape | ||
|
||
# Create a grid of pixel coordinates (u, v) | ||
u, v = np.meshgrid(np.arange(width), np.arange(height)) | ||
uv_coords = np.stack([u, v, np.ones_like(u)], axis=-1) # Shape: (H, W, 3) | ||
|
||
# Reshape to (H*W, 3) for easier matrix multiplication | ||
uv_coords = uv_coords.reshape(-1, 3) | ||
|
||
# Invert the camera intrinsic matrix to project pixels to camera coordinates | ||
K_inv = np.linalg.inv(camera_intrinsics) | ||
|
||
# Compute 3D points in the camera coordinate system | ||
cam_coords = (K_inv @ uv_coords.T).T # Shape: (H*W, 3) | ||
cam_coords *= depth_img.reshape(-1)[..., None] # Scale by depth | ||
|
||
# Remove invalid points (e.g., depth = 0) | ||
# valid_mask = depth_img.flatten() > 0 | ||
# cam_coords = cam_coords[valid_mask] | ||
cam_coords = cam_coords[..., [2, 1, 0]] | ||
|
||
# Transform points to the world coordinate system | ||
world_coords = (camera_rotation @ cam_coords.T).T + camera_translation | ||
|
||
# to original shape | ||
world_coords = world_coords.reshape(height, width, 3) | ||
return world_coords.swapaxes(0, 1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#!/usr/bin/env python | ||
""" | ||
This script demonstrates how to use the environment where traffic and road map are loaded from Waymo dataset. | ||
""" | ||
import numpy as np | ||
from metadrive.component.sensors.point_cloud_lidar import PointCloudLidar | ||
from metadrive.constants import HELP_MESSAGE | ||
from metadrive.engine.asset_loader import AssetLoader | ||
from panda3d.core import Point3 | ||
from metadrive.envs.scenario_env import ScenarioEnv | ||
|
||
RENDER_MESSAGE = { | ||
"Quit": "ESC", | ||
"Switch perspective": "Q or B", | ||
"Reset Episode": "R", | ||
"Keyboard Control": "W,A,S,D", | ||
} | ||
|
||
if __name__ == "__main__": | ||
asset_path = AssetLoader.asset_path | ||
print(HELP_MESSAGE) | ||
try: | ||
env = ScenarioEnv( | ||
{ | ||
"manual_control": True, | ||
"sequential_seed": True, | ||
"reactive_traffic": False, | ||
"use_render": True, | ||
"image_observation": True, | ||
"vehicle_config": dict(image_source="point_cloud"), | ||
"sensors": dict(point_cloud=(PointCloudLidar, 200, 64, True)), # 64 channel lidar | ||
"data_directory": AssetLoader.file_path(asset_path, "nuscenes", unix_style=False), | ||
"num_scenarios": 10 | ||
} | ||
) | ||
o, _ = env.reset() | ||
|
||
cam = env.engine.get_sensor("point_cloud").cam | ||
drawer = env.engine.make_line_drawer() | ||
for i in range(1, 100000): | ||
o, r, tm, tc, info = env.step([1.0, 0.]) | ||
points = o["image"][..., :, -1] + np.asarray(env.engine.render.get_relative_point(cam, Point3(0, 0, 0))) | ||
|
||
drawer.reset() | ||
drawer.draw_lines(points) | ||
env.render(text=RENDER_MESSAGE, ) | ||
if tm or tc: | ||
env.reset() | ||
finally: | ||
env.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import pytest | ||
|
||
from metadrive.component.sensors.point_cloud_lidar import PointCloudLidar | ||
from metadrive.envs.metadrive_env import MetaDriveEnv | ||
|
||
blackbox_test_configs = dict( | ||
# standard=dict(stack_size=3, width=256, height=128, norm_pixel=True), | ||
small=dict(stack_size=1, width=64, height=32, norm_pixel=False), | ||
) | ||
|
||
|
||
@pytest.mark.parametrize("config", list(blackbox_test_configs.values()), ids=list(blackbox_test_configs.keys())) | ||
def test_point_cloud(config, render=False): | ||
""" | ||
Temporally disable it, as Github CI can not support compute shader | ||
Test the output shape of Depth camera. This can not make sure the correctness of rendered image but only for | ||
checking the shape of image output and image retrieve pipeline | ||
Args: | ||
config: test parameter | ||
render: render with cv2 | ||
Returns: None | ||
""" | ||
env = MetaDriveEnv( | ||
{ | ||
"num_scenarios": 1, | ||
"traffic_density": 0.1, | ||
"show_terrain": False, | ||
"map": "S", | ||
"start_seed": 4, | ||
"stack_size": config["stack_size"], | ||
"vehicle_config": dict(image_source="camera"), | ||
"sensors": { | ||
"camera": (PointCloudLidar, config["width"], config["height"], True) | ||
}, | ||
"interface_panel": ["dashboard", "camera"], | ||
"image_observation": True, # it is a switch telling metadrive to use rgb as observation | ||
"norm_pixel": config["norm_pixel"], # clip rgb to range(0,1) instead of (0, 255) | ||
} | ||
) | ||
try: | ||
env.reset() | ||
import cv2 | ||
import time | ||
start = time.time() | ||
for i in range(1, 10): | ||
o, r, tm, tc, info = env.step([0, 1]) | ||
assert env.observation_space.contains(o) | ||
# Reverse | ||
assert o["image"].shape == ( | ||
config["height"], config["width"], PointCloudLidar.num_channels, config["stack_size"] | ||
) | ||
if render: | ||
cv2.imshow('img', o["image"][..., -1]) | ||
cv2.waitKey(1) | ||
print("FPS:", 10 / (time.time() - start)) | ||
finally: | ||
env.close() | ||
|
||
|
||
if __name__ == '__main__': | ||
test_point_cloud(config=blackbox_test_configs["small"], render=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
from metadrive.component.sensors.point_cloud_lidar import PointCloudLidar | ||
import numpy as np | ||
from panda3d.core import Point3 | ||
from metadrive.envs.safe_metadrive_env import SafeMetaDriveEnv | ||
|
||
if __name__ == "__main__": | ||
env = SafeMetaDriveEnv( | ||
{ | ||
"num_scenarios": 1, | ||
"traffic_density": 0., | ||
"accident_prob": 1., | ||
"start_seed": 4, | ||
"map": "SSSSS", | ||
"manual_control": True, | ||
# "use_render": True, | ||
"image_observation": True, | ||
# "norm_pixel": True, | ||
"use_render": True, | ||
"debug": False, | ||
"interface_panel": ["point_cloud"], | ||
"sensors": dict(point_cloud=(PointCloudLidar, 200, 64, True)), # 64 channel lidar | ||
"vehicle_config": dict(image_source="point_cloud"), | ||
# "map_config": { | ||
# BaseMap.GENERATE_TYPE: MapGenerateMethod.BIG_BLOCK_NUM, | ||
# BaseMap.GENERATE_CONFIG: 12, | ||
# BaseMap.LANE_WIDTH: 3.5, | ||
# BaseMap.LANE_NUM: 3, | ||
# } | ||
} | ||
) | ||
env.reset() | ||
drawer = env.engine.make_line_drawer() | ||
cam = env.engine.get_sensor("point_cloud").cam | ||
|
||
for i in range(1, 100000): | ||
o, r, tm, tc, info = env.step([0, 1]) | ||
assert env.observation_space.contains(o) | ||
|
||
# to world coordinate | ||
points = o["image"][..., :, -1] + np.asarray(env.engine.render.get_relative_point(cam, Point3(0, 0, 0))) | ||
|
||
drawer.reset() | ||
drawer.draw_lines(points) | ||
# drawer.draw_points(points, colors=[(0, 0, 1, 1)] * len(points)) | ||
# | ||
# np.zeros([60, 3]) | ||
# | ||
# env.vehicle.convert_to_world_coordinates() | ||
if env.config["use_render"]: | ||
# for i in range(ImageObservation.STACK_SIZE): | ||
# ObservationType.show_gray_scale_array(o["image"][:, :, i]) | ||
env.render() | ||
# if tm or tc: | ||
# # print("Reset") | ||
# env.reset() | ||
env.close() |