Skip to content

Commit

Permalink
Converting depth value to point cloud (#800)
Browse files Browse the repository at this point in the history
* update fov config
add collect_sensors.py

* fix magic number

* camera mask for debugging drawer

* workable version

* revert camera fov

* add example

* delete debug example

* add test

* add point cloud to it

* update doc and readme

* fix simgen test

* remove check for duplicated sensor

* format

* don not test simgen

---------

Co-authored-by: Alan <[email protected]>
  • Loading branch information
QuanyiLi and Alan-LanFeng authored Jan 18, 2025
1 parent 12b2bdc commit 9273c2d
Show file tree
Hide file tree
Showing 13 changed files with 307 additions and 92 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ MetaDrive is a driving simulator with the following key features:

- **Compositional**: It supports synthesising infinite scenes with various road maps and traffic settings or loading real-world driving logs for the research of generalizable RL.
- **Lightweight**: It is easy to install and run on Linux/Windows/MacOS with sensor simulation support. It can run up to +1000 FPS on a standard PC.
- **Realistic**: Accurate physics simulation and multiple sensory input including Lidar, RGB/Depth/Semantic images, top-down semantic map and first-person view images.
- **Realistic**: Accurate physics simulation and multiple sensory input including point cloud, RGB/Depth/Semantic images, top-down semantic map and first-person view images.


## 🛠 Quick Start
Expand Down
6 changes: 5 additions & 1 deletion documentation/source/sensors.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,11 @@
"* Main Camera\n",
"* RGB Camera\n",
"* Depth Camera\n",
"* Semantic Camera"
"* Semantic Camera\n",
"* Instance Camera\n",
"* Lidar (Cloud Point)\n",
"\n",
"The following example mainly uses the semantic camera, but the same method can be applied to other sensors including the point cloud."
]
},
{
Expand Down
105 changes: 23 additions & 82 deletions documentation/source/simgen_render.ipynb

Large diffs are not rendered by default.

95 changes: 95 additions & 0 deletions metadrive/component/sensors/point_cloud_lidar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import numpy as np
from panda3d.core import Point3
from scipy.spatial.transform import Rotation as R

from metadrive.component.sensors.depth_camera import DepthCamera


class PointCloudLidar(DepthCamera):
"""
This can be viewed as a special camera sensor, whose RGB channel is (x, y ,z) world coordinate of the point cloud.
Thus, it is compatible with all image related stuff. For example, you can use it with ImageObservation.
"""
num_channels = 3 # x, y, z coordinates

def __init__(self, width, height, ego_centric, engine, *, cuda=False):
"""
If ego_centric is True, the point cloud will be in the camera's ego coordinate system.
"""
if cuda:
raise ValueError("LiDAR does not support CUDA acceleration for now. Ask for support if you need it.")
super(PointCloudLidar, self).__init__(width, height, engine, cuda=False)
self.ego_centric = ego_centric

def get_rgb_array_cpu(self):
"""
The result of this function is now a 3D array of point cloud coord in shape (H, W, 3)
The lens parameters can be changed on the fly!
"""

lens = self.lens
fov = lens.getFov()
f_x = self.BUFFER_W / 2 / (np.tan(fov[0] / 2 / 180 * np.pi))
f_y = self.BUFFER_H / 2 / (np.tan(fov[1] / 2 / 180 * np.pi))
intrinsics = np.asarray([[f_x, 0, (self.BUFFER_H - 1) / 2], [0, f_y, (self.BUFFER_W - 1) / 2], [0, 0, 1]])
f = lens.getFar()
n = lens.getNear()

depth = super(PointCloudLidar, self).get_rgb_array_cpu()
hpr = self.cam.getHpr(self.engine.render)
hpr[0] += 90 # pand3d's y is the camera facing direction, so we need to rotate it 90 degree
hpr[1] *= -1 # left right handed convert
rot = R.from_euler('ZYX', hpr, degrees=True)
rotation_matrix = rot.as_matrix()
translation = Point3(0, 0, 0)
if not self.ego_centric:
translation = np.asarray(self.engine.render.get_relative_point(self.cam, Point3(0, 0, 0)))
z_eye = 2 * n * f / ((f + n) - (2 * depth - 1) * (f - n))
points = self.simulate_lidar_from_depth(z_eye.squeeze(-1), intrinsics, translation, rotation_matrix)
return points

@staticmethod
def simulate_lidar_from_depth(depth_img, camera_intrinsics, camera_translation, camera_rotation):
"""
Simulate LiDAR points in the world coordinate system from a depth image.
Parameters:
depth_img (np.ndarray): Depth image of shape (H, W, 3), where the last dimension represents RGB or grayscale depth values.
camera_intrinsics (np.ndarray): Camera intrinsic matrix of shape (3, 3).
camera_translation (np.ndarray): Translation vector of the camera in world coordinates of shape (3,).
camera_rotation (np.ndarray): Rotation matrix of the camera in world coordinates of shape (3, 3).
Returns:
np.ndarray: LiDAR points in the world coordinate system of shape (N, 3), where N is the number of valid points.
"""
# Extract the depth channel (assuming it's grayscale or depth is in the R channel)
# Get image dimensions
depth_img = depth_img.T
depth_img = depth_img[::-1, ::-1]
height, width = depth_img.shape

# Create a grid of pixel coordinates (u, v)
u, v = np.meshgrid(np.arange(width), np.arange(height))
uv_coords = np.stack([u, v, np.ones_like(u)], axis=-1) # Shape: (H, W, 3)

# Reshape to (H*W, 3) for easier matrix multiplication
uv_coords = uv_coords.reshape(-1, 3)

# Invert the camera intrinsic matrix to project pixels to camera coordinates
K_inv = np.linalg.inv(camera_intrinsics)

# Compute 3D points in the camera coordinate system
cam_coords = (K_inv @ uv_coords.T).T # Shape: (H*W, 3)
cam_coords *= depth_img.reshape(-1)[..., None] # Scale by depth

# Remove invalid points (e.g., depth = 0)
# valid_mask = depth_img.flatten() > 0
# cam_coords = cam_coords[valid_mask]
cam_coords = cam_coords[..., [2, 1, 0]]

# Transform points to the world coordinate system
world_coords = (camera_rotation @ cam_coords.T).T + camera_translation

# to original shape
world_coords = world_coords.reshape(height, width, 3)
return world_coords.swapaxes(0, 1)
2 changes: 1 addition & 1 deletion metadrive/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,4 +553,4 @@ class CameraTagStateKey:


DEFAULT_SENSOR_OFFSET = (0., 0.8, 1.5)
DEFAULT_SENSOR_HPR = (0., 0.0, 0.0)
DEFAULT_SENSOR_HPR = (0., -5, 0.0)
4 changes: 2 additions & 2 deletions metadrive/engine/core/draw.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
class ColorLineNodePath(LineNodePath):
def __init__(self, parent=None, thickness=1.0):
super(ColorLineNodePath, self).__init__(parent, name=None, thickness=thickness, colorVec=VBase4(1))
self.hide(CamMask.Shadow)
self.hide(CamMask.Shadow | CamMask.DepthCam)
self.clearShader()
self.setShaderAuto()

Expand Down Expand Up @@ -43,7 +43,7 @@ def __init__(self, parent=None, scale=1):
super(ColorSphereNodePath, self).__init__("Point Debugger")
scale /= 10
self.scale = scale
self.hide(CamMask.Shadow)
self.hide(CamMask.Shadow | CamMask.DepthCam)
self.reparentTo(self.engine.render if parent is None else parent)
self._existing_points = []
self._dying_points = []
Expand Down
2 changes: 2 additions & 0 deletions metadrive/engine/core/engine_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,8 @@ def setup_sensors(self):
if sensor_id == "main_camera":
# It is added when initializing main_camera
continue
if sensor_id in self.sensors:
raise ValueError("Sensor id {} is duplicated!".format(sensor_id))
cls = sensor_cfg[0]
args = sensor_cfg[1:]
assert issubclass(cls, BaseSensor), "{} is not a subclass of BaseSensor".format(cls.__name__)
Expand Down
3 changes: 2 additions & 1 deletion metadrive/engine/core/image_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ def _create_camera(self, parent_node, bkg_color):
if parent_node:
self.cam.reparentTo(parent_node)
self.lens = self.cam.node().getLens()
self.lens.setFov(60)
self.lens.setFov(self.engine.global_config["camera_fov"])
self.lens.setAspectRatio(self.BUFFER_W / self.BUFFER_H)

def _create_buffer(self, width, height, frame_buffer_property):
"""
Expand Down
7 changes: 3 additions & 4 deletions metadrive/envs/base_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import time
from collections import defaultdict
from typing import Union, Dict, AnyStr, Optional, Tuple, Callable

from metadrive.constants import DEFAULT_SENSOR_HPR, DEFAULT_SENSOR_OFFSET
import gymnasium as gym
import numpy as np
from panda3d.core import PNMImage
Expand Down Expand Up @@ -565,7 +565,7 @@ def reset_sensors(self):
self.main_camera.set_bird_view_pos_hpr(current_track_agent.position)
for name, sensor in self.engine.sensors.items():
if hasattr(sensor, "track") and name != "main_camera":
sensor.track(current_track_agent.origin, [0., 0.8, 1.5], [0, 0.59681, 0])
sensor.track(current_track_agent.origin, DEFAULT_SENSOR_OFFSET, DEFAULT_SENSOR_HPR)
# Step the env to avoid the black screen in the first frame.
self.engine.taskMgr.step()

Expand Down Expand Up @@ -895,8 +895,7 @@ def switch_to_third_person_view(self):
self.main_camera.track(current_track_agent)
for name, sensor in self.engine.sensors.items():
if hasattr(sensor, "track") and name != "main_camera":
camera_video_posture = [0, 0.59681, 0]
sensor.track(current_track_agent.origin, constants.DEFAULT_SENSOR_OFFSET, camera_video_posture)
sensor.track(current_track_agent.origin, constants.DEFAULT_SENSOR_OFFSET, DEFAULT_SENSOR_HPR)
return

def next_seed_reset(self):
Expand Down
50 changes: 50 additions & 0 deletions metadrive/examples/point_cloud_lidar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env python
"""
This script demonstrates how to use the environment where traffic and road map are loaded from Waymo dataset.
"""
import numpy as np
from metadrive.component.sensors.point_cloud_lidar import PointCloudLidar
from metadrive.constants import HELP_MESSAGE
from metadrive.engine.asset_loader import AssetLoader
from panda3d.core import Point3
from metadrive.envs.scenario_env import ScenarioEnv

RENDER_MESSAGE = {
"Quit": "ESC",
"Switch perspective": "Q or B",
"Reset Episode": "R",
"Keyboard Control": "W,A,S,D",
}

if __name__ == "__main__":
asset_path = AssetLoader.asset_path
print(HELP_MESSAGE)
try:
env = ScenarioEnv(
{
"manual_control": True,
"sequential_seed": True,
"reactive_traffic": False,
"use_render": True,
"image_observation": True,
"vehicle_config": dict(image_source="point_cloud"),
"sensors": dict(point_cloud=(PointCloudLidar, 200, 64, True)), # 64 channel lidar
"data_directory": AssetLoader.file_path(asset_path, "nuscenes", unix_style=False),
"num_scenarios": 10
}
)
o, _ = env.reset()

cam = env.engine.get_sensor("point_cloud").cam
drawer = env.engine.make_line_drawer()
for i in range(1, 100000):
o, r, tm, tc, info = env.step([1.0, 0.])
points = o["image"][..., :, -1] + np.asarray(env.engine.render.get_relative_point(cam, Point3(0, 0, 0)))

drawer.reset()
drawer.draw_lines(points)
env.render(text=RENDER_MESSAGE, )
if tm or tc:
env.reset()
finally:
env.close()
3 changes: 3 additions & 0 deletions metadrive/obs/image_obs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import gymnasium as gym
from metadrive.component.sensors.base_camera import BaseCamera
from metadrive.component.sensors.point_cloud_lidar import PointCloudLidar
import numpy as np

from metadrive.component.vehicle.base_vehicle import BaseVehicle
Expand Down Expand Up @@ -69,6 +70,8 @@ def observation_space(self):
channel = sensor_cls.num_channels if sensor_cls != "MainCamera" else 3
shape = (self.config["sensors"][self.image_source][2],
self.config["sensors"][self.image_source][1]) + (channel, self.STACK_SIZE)
if sensor_cls is PointCloudLidar:
return gym.spaces.Box(-np.inf, np.inf, shape=shape, dtype=np.float32)
if self.norm_pixel:
return gym.spaces.Box(-0.0, 1.0, shape=shape, dtype=np.float32)
else:
Expand Down
64 changes: 64 additions & 0 deletions metadrive/tests/test_sensors/test_point_cloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import pytest

from metadrive.component.sensors.point_cloud_lidar import PointCloudLidar
from metadrive.envs.metadrive_env import MetaDriveEnv

blackbox_test_configs = dict(
# standard=dict(stack_size=3, width=256, height=128, norm_pixel=True),
small=dict(stack_size=1, width=64, height=32, norm_pixel=False),
)


@pytest.mark.parametrize("config", list(blackbox_test_configs.values()), ids=list(blackbox_test_configs.keys()))
def test_point_cloud(config, render=False):
"""
Temporally disable it, as Github CI can not support compute shader
Test the output shape of Depth camera. This can not make sure the correctness of rendered image but only for
checking the shape of image output and image retrieve pipeline
Args:
config: test parameter
render: render with cv2
Returns: None
"""
env = MetaDriveEnv(
{
"num_scenarios": 1,
"traffic_density": 0.1,
"show_terrain": False,
"map": "S",
"start_seed": 4,
"stack_size": config["stack_size"],
"vehicle_config": dict(image_source="camera"),
"sensors": {
"camera": (PointCloudLidar, config["width"], config["height"], True)
},
"interface_panel": ["dashboard", "camera"],
"image_observation": True, # it is a switch telling metadrive to use rgb as observation
"norm_pixel": config["norm_pixel"], # clip rgb to range(0,1) instead of (0, 255)
}
)
try:
env.reset()
import cv2
import time
start = time.time()
for i in range(1, 10):
o, r, tm, tc, info = env.step([0, 1])
assert env.observation_space.contains(o)
# Reverse
assert o["image"].shape == (
config["height"], config["width"], PointCloudLidar.num_channels, config["stack_size"]
)
if render:
cv2.imshow('img', o["image"][..., -1])
cv2.waitKey(1)
print("FPS:", 10 / (time.time() - start))
finally:
env.close()


if __name__ == '__main__':
test_point_cloud(config=blackbox_test_configs["small"], render=True)
56 changes: 56 additions & 0 deletions metadrive/tests/vis_functionality/vis_point_cloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from metadrive.component.sensors.point_cloud_lidar import PointCloudLidar
import numpy as np
from panda3d.core import Point3
from metadrive.envs.safe_metadrive_env import SafeMetaDriveEnv

if __name__ == "__main__":
env = SafeMetaDriveEnv(
{
"num_scenarios": 1,
"traffic_density": 0.,
"accident_prob": 1.,
"start_seed": 4,
"map": "SSSSS",
"manual_control": True,
# "use_render": True,
"image_observation": True,
# "norm_pixel": True,
"use_render": True,
"debug": False,
"interface_panel": ["point_cloud"],
"sensors": dict(point_cloud=(PointCloudLidar, 200, 64, True)), # 64 channel lidar
"vehicle_config": dict(image_source="point_cloud"),
# "map_config": {
# BaseMap.GENERATE_TYPE: MapGenerateMethod.BIG_BLOCK_NUM,
# BaseMap.GENERATE_CONFIG: 12,
# BaseMap.LANE_WIDTH: 3.5,
# BaseMap.LANE_NUM: 3,
# }
}
)
env.reset()
drawer = env.engine.make_line_drawer()
cam = env.engine.get_sensor("point_cloud").cam

for i in range(1, 100000):
o, r, tm, tc, info = env.step([0, 1])
assert env.observation_space.contains(o)

# to world coordinate
points = o["image"][..., :, -1] + np.asarray(env.engine.render.get_relative_point(cam, Point3(0, 0, 0)))

drawer.reset()
drawer.draw_lines(points)
# drawer.draw_points(points, colors=[(0, 0, 1, 1)] * len(points))
#
# np.zeros([60, 3])
#
# env.vehicle.convert_to_world_coordinates()
if env.config["use_render"]:
# for i in range(ImageObservation.STACK_SIZE):
# ObservationType.show_gray_scale_array(o["image"][:, :, i])
env.render()
# if tm or tc:
# # print("Reset")
# env.reset()
env.close()

0 comments on commit 9273c2d

Please sign in to comment.