From 196fb5f1cbac5ea141d15171b0de5a85699a3b0f Mon Sep 17 00:00:00 2001 From: rhett-chen <2206571319@qq.com> Date: Tue, 7 Dec 2021 21:20:28 +0800 Subject: [PATCH] release --- README.md | 82 ++++++++++++++++++++ ap_in_one_image.py | 136 ---------------------------------- command_test.sh | 1 + command_train.sh | 2 +- compute_ap.py | 66 ----------------- dataset/generate_graspness.py | 13 +++- dataset/graspnet_dataset.py | 27 +------ dataset/simplify_dataset.py | 10 ++- demo.py | 124 ------------------------------- models/backbone.py | 131 -------------------------------- models/graspnet.py | 34 +-------- models/loss.py | 45 ++++------- models/modules.py | 73 ------------------ models/resnet.py | 5 -- requirements.txt | 1 + test.py | 34 ++++----- train.py | 14 ++-- utils/label_generation.py | 13 ---- vis_and_analysis.py | 44 ----------- 19 files changed, 143 insertions(+), 712 deletions(-) create mode 100644 README.md delete mode 100644 ap_in_one_image.py create mode 100644 command_test.sh delete mode 100644 compute_ap.py delete mode 100644 demo.py delete mode 100644 models/backbone.py delete mode 100644 vis_and_analysis.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..a7d9289 --- /dev/null +++ b/README.md @@ -0,0 +1,82 @@ +# GraspNet graspness +My implementation of paper "Graspness Discovery in Clutters for Fast and Accurate Grasp Detection" (ICCV 2021). + +[[paper](https://openaccess.thecvf.com/content/ICCV2021/papers/Wang_Graspness_Discovery_in_Clutters_for_Fast_and_Accurate_Grasp_Detection_ICCV_2021_paper.pdf)] +[[dataset](https://graspnet.net/)] +[[API](https://github.com/graspnet/graspnetAPI)] + + +## Requirements +- Python 3 +- PyTorch 1.6 +- Open3d 0.8 +- TensorBoard 2.3 +- NumPy +- SciPy +- Pillow +- tqdm +- MinkowskiEngine + +## Installation +Get the code. +```bash +git clone https://github.com/rhett-chen/graspness_implementation.git +cd graspnet-graspness +``` +Install packages via Pip. +```bash +pip install -r requirements.txt +``` +Compile and install pointnet2 operators (code adapted from [votenet](https://github.com/facebookresearch/votenet)). +```bash +cd pointnet2 +python setup.py install +``` +Compile and install knn operator (code adapted from [pytorch_knn_cuda](https://github.com/chrischoy/pytorch_knn_cuda)). +```bash +cd knn +python setup.py install +``` +Install graspnetAPI for evaluation. +```bash +git clone https://github.com/graspnet/graspnetAPI.git +cd graspnetAPI +pip install . +``` + +## Point level Graspness Generation +Point level graspness label are not included in the original dataset, and need additional generation. Make sure you have downloaded the orginal dataset from [GraspNet](https://graspnet.net/). The generation code is in [dataset/generate_graspness.py](dataset/generate_graspness.py). +```bash +cd dataset +python generate_graspness.py --dataset_root /data3/graspnet --camera_type kinect +``` + +## Simplify dataset +original dataset grasp_label files have redundant data, We can significantly save the memory cost. The code is in [dataset/simplify_dataset.py](dataset/simplify_dataset.py) +```bash +cd dataset +python simplify_dataset.py --dataset_root /data3/graspnet +``` + +## Training and Testing +Training examples are shown in [command_train.sh](command_train.sh). `--dataset_root`, `--camera` and `--log_dir` should be specified according to your settings. You can use TensorBoard to visualize training process. + +Testing examples are shown in [command_test.sh](command_test.sh), which contains inference and result evaluation. `--dataset_root`, `--camera`, `--checkpoint_path` and `--dump_dir` should be specified according to your settings. Set `--collision_thresh` to -1 for fast inference. + +If you need the trained weights, you can contact me directly. + +## Results +Results "In repo" report the model performance of my results without collision detection. + +Evaluation results on Kinect camera: +| | | Seen | | | Similar | | | Novel | | +|:--------:|:------:|:----------------:|:----------------:|:------:|:----------------:|:----------------:|:------:|:----------------:|:----------------:| +| | __AP__ | AP0.8 | AP0.4 | __AP__ | AP0.8 | AP0.4 | __AP__ | AP0.8 | AP0.4 | +| In paper | 61.19 | 71.46 | 56.04 | 47.39 | 56.78 | 40.43 | 19.01 | 23.73 | 10.60 | +| In repo | 61.83 | 73.28 | 54.14 | 51.13 | 62.53 | 41.57 | 19.94 | 24.90 | 11.02 | + +## Troubleshooting +If you meet the torch.floor error in MinkowskiEngine, you can simplify solve it by change the source code of MinkowskiEngine: +MinkowskiEngine/utils/quantization.py 262,from discrete_coordinates =_auto_floor(coordinates) to discrete_coordinates = coordinates +## Acknowledgement +My code is mainly based on Graspnet-baseline https://github.com/graspnet/graspnet-baseline. diff --git a/ap_in_one_image.py b/ap_in_one_image.py deleted file mode 100644 index be9c6c2..0000000 --- a/ap_in_one_image.py +++ /dev/null @@ -1,136 +0,0 @@ -import numpy as np -from graspnetAPI import GraspGroup -import os -import pickle -import open3d as o3d -from graspnetAPI.utils.config import get_config -from graspnetAPI.utils.xmlhandler import xmlReader -from graspnetAPI.utils.eval_utils import create_table_points, parse_posevector, transform_points, \ - eval_grasp, load_dexnet_model, voxel_sample_points -import sys - -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -ROOT_DIR = os.path.dirname(BASE_DIR) -sys.path.append(ROOT_DIR) - - -def get_config_rgbmatter(): - ''' - - return the config dict - ''' - config = dict() - config['scene_id'] = 100 - config['index_str'] = '0000' - config['camera'] = 'kinect' - config['collision_detection_choice'] = 'point_cloud' # point_cloud or full_model - - # config['dataset_path'] = "/data3/graspnet/" - config['dataset_path'] = '/media/bot/980A6F5E0A6F38801/datasets/graspnet' - - # config['res_6D_pose_path'] = '/home/zibo/cvpr2022/graspnet-cvpr/logs/' \ - # 'log_kn_graspness_minkowski_v0/dump_epoch04_fps' - config['res_6D_pose_path'] = '/data/zibo/logs/log_kn_v1/dump_epoch06_fps/' - config['scene_id_str'] = 'scene_' + str(config['scene_id']).zfill(4) - return config - - -my_config = get_config_rgbmatter() - -res_6d_path = os.path.join(my_config['res_6D_pose_path'], my_config['scene_id_str'], my_config['camera'], - my_config['index_str'] + '.npy') -# res_6d_path = os.path.join(my_config['grasp_gt_path'], my_config['scene_id_str'], my_config['camera'], -# my_config['index_str'] + '.npy') -# res_6d_path = my_config['res_6D_pose_path'] -grasp = np.load(res_6d_path) -print('\ntesting {}, scene_{} image_{}'.format(res_6d_path, my_config['scene_id'], my_config['index_str'])) - -grasp_group = GraspGroup(grasp) -print('grasp shape: ', grasp_group.grasp_group_array.shape) -camera_pose = np.load(os.path.join(my_config['dataset_path'], 'scenes', my_config['scene_id_str'], my_config['camera'], - 'camera_poses.npy'))[int(my_config['index_str'])] -align_mat = np.load(os.path.join(my_config['dataset_path'], 'scenes', my_config['scene_id_str'], - my_config['camera'], 'cam0_wrt_table.npy')) -scene_reader = xmlReader(os.path.join(my_config['dataset_path'], 'scenes', my_config['scene_id_str'], - my_config['camera'], 'annotations', my_config['index_str'] + '.xml')) - -config = get_config() -TOP_K = 50 -list_coe_of_friction = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2] -max_width = 0.1 -model_dir = os.path.join(my_config['dataset_path'], 'models') - -posevectors = scene_reader.getposevectorlist() -obj_list = [] -pose_list = [] -for posevector in posevectors: - obj_idx, mat = parse_posevector(posevector) - obj_list.append(obj_idx) - pose_list.append(mat) -table = create_table_points(1.0, 1.0, 0.05, dx=-0.5, dy=-0.5, dz=-0.05, grid_size=0.008) -table_trans = transform_points(table, np.linalg.inv(np.matmul(align_mat, camera_pose))) - -gg_array = grasp_group.grasp_group_array -min_width_mask = (gg_array[:, 1] < 0) -max_width_mask = (gg_array[:, 1] > max_width) -gg_array[min_width_mask, 1] = 0 -gg_array[max_width_mask, 1] = max_width -grasp_group.grasp_group_array = gg_array - -obj_list = [] -model_list = [] -dexmodel_list = [] -for posevector in posevectors: - obj_idx, _ = parse_posevector(posevector) - obj_list.append(obj_idx) -for obj_idx in obj_list: - model = o3d.io.read_point_cloud(os.path.join(model_dir, '%03d' % obj_idx, 'nontextured.ply')) - dex_cache_path = os.path.join(my_config['dataset_path'], "dex_models", '%03d.pkl' % obj_idx) - if os.path.exists(dex_cache_path): # don't know why in 203, pickle.load() will throw an error, so add not - with open(dex_cache_path, 'rb') as f: - dexmodel = pickle.load(f) - else: - dexmodel = load_dexnet_model(os.path.join(model_dir, '%03d' % obj_idx, 'textured')) - points = np.array(model.points) - model_list.append(points) - dexmodel_list.append(dexmodel) - -model_sampled_list = list() -for model in model_list: - model_sampled = voxel_sample_points(model, 0.008) - model_sampled_list.append(model_sampled) - -grasp_list, score_list, collision_mask_list = eval_grasp(grasp_group, model_sampled_list, dexmodel_list, pose_list, - config, table=table_trans, voxel_size=0.008, TOP_K=TOP_K) -# remove empty -grasp_list = [x for x in grasp_list if len(x) != 0] -score_list = [x for x in score_list if len(x) != 0] -collision_mask_list = [x for x in collision_mask_list if len(x) != 0] - -if len(grasp_list) == 0: - grasp_accuracy = np.zeros((TOP_K, len(list_coe_of_friction))) - print('\rMean Accuracy {}'.format(np.mean(grasp_accuracy[:, :]))) - -# concat into scene level -grasp_list, score_list, collision_mask_list = np.concatenate(grasp_list), np.concatenate( - score_list), np.concatenate(collision_mask_list) -# sort in scene level -print('final grasp shape that used to compute ap: ', grasp_list.shape) -grasp_confidence = grasp_list[:, 0] -indices = np.argsort(-grasp_confidence) -grasp_list, score_list, collision_mask_list = grasp_list[indices], score_list[indices], collision_mask_list[ - indices] - -# calculate AP -grasp_accuracy = np.zeros((TOP_K, len(list_coe_of_friction))) -print(score_list) -# score_list = np.array([i for i in score_list if abs(i + 3) > 1.001]) -for fric_idx, fric in enumerate(list_coe_of_friction): - for k in range(0, TOP_K): - if k + 1 > len(score_list): - grasp_accuracy[k, fric_idx] = np.sum(((score_list <= fric) & (score_list > 0)).astype(int)) / ( - k + 1) - else: - grasp_accuracy[k, fric_idx] = np.sum( - ((score_list[0:k + 1] <= fric) & (score_list[0:k + 1] > 0)).astype(int)) / (k + 1) -# print(grasp_accuracy) -print('\nMean Accuracy: %.3f' % (100.0 * np.mean(grasp_accuracy[:, :]))) diff --git a/command_test.sh b/command_test.sh new file mode 100644 index 0000000..1126d7a --- /dev/null +++ b/command_test.sh @@ -0,0 +1 @@ +CUDA_VISIBLE_DEVICES=4 python test.py --camera kinect --dump_dir logs/log_kn/dump_epoch03 --checkpoint_path logs/log_kn/epoch03.tar --batch_size 1 --dataset_root /data3/graspnet --infer --eval \ No newline at end of file diff --git a/command_train.sh b/command_train.sh index d139e64..6b5c623 100644 --- a/command_train.sh +++ b/command_train.sh @@ -1 +1 @@ -CUDA_VISIBLE_DEVICES=4 python train.py --log_dir logs/log_kn_v9 --batch_size 4 --learning_rate 0.001 --model_name np15000_graspness1e-1_bs4_lr1e-3_viewres_dataaug_fps_14C --dataset_root /data3/graspnet \ No newline at end of file +CUDA_VISIBLE_DEVICES=4 python train.py --camera kinect --log_dir logs/log_kn --batch_size 4 --learning_rate 0.001 --model_name minkuresnet --dataset_root /data3/graspnet \ No newline at end of file diff --git a/compute_ap.py b/compute_ap.py deleted file mode 100644 index 8fed7f3..0000000 --- a/compute_ap.py +++ /dev/null @@ -1,66 +0,0 @@ -import numpy as np -import os - -acc_all = [] -# ap_scenes_path = '/data/zibo/cvpr22/logs/log_kn_graspnessrevised_pointnet2_v15_cs/' \ -# 'dump_epoch06_fps/ap_scenes' -ap_scenes_path = '/home/zibo/graspness/logs/log_kn_v9/dump_epoch06/ap_scenes' -print('For ', ap_scenes_path) -for index in range(100, 130): - acc_scene = [] - for i in range(0, 256): - path = os.path.join(ap_scenes_path, 'scene_' + str(index).zfill(4), str(i).zfill(4) + '.npy') - acc_c = np.load(path) - acc_scene.append(acc_c) - acc_all.append(acc_scene) - -acc_all = np.array(acc_all) * 100. -# 90 scenes * 256 images * 50 top_k * 6 len(list_coe_of_friction = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2]) -print('acc shape: ', acc_all.shape) -ap_all = np.mean(acc_all) -# ap_seen_all = np.mean(acc_all[0:30]) - -# ap_unseen_all = np.mean(acc_all[30:60]) -# ap_novel_all = np.mean(acc_all[60:90]) -print('AP: ', ap_all) -# print('AP Seen: ', ap_seen_all) -# print('AP Unseen: ', ap_unseen_all) -# print('AP Novel: ', ap_novel_all) - -ap_all_2 = np.mean(acc_all[:, :, :, 0]) -ap_all_4 = np.mean(acc_all[:, :, :, 1]) -ap_all_8 = np.mean(acc_all[:, :, :, 3]) -ap_all_10 = np.mean(acc_all[:, :, :, 4]) -ap_all_12 = np.mean(acc_all[:, :, :, 5]) - -# -# ap_seen_2 = np.mean(acc_all[0:30, :, :, 0]) -# ap_seen_4 = np.mean(acc_all[0:30, :, :, 1]) -# ap_seen_8 = np.mean(acc_all[0:30, :, :, 3]) -# -# ap_unseen_2 = np.mean(acc_all[30:60, :, :, 0]) -# ap_unseen_4 = np.mean(acc_all[30:60, :, :, 1]) -# ap_unseen_8 = np.mean(acc_all[30:60, :, :, 3]) -# -# -# ap_novel_2 = np.mean(acc_all[60:90, :, :, 0]) -# ap_novel_4 = np.mean(acc_all[60:90, :, :, 1]) -# ap_novel_8 = np.mean(acc_all[60:90, :, :, 3 ]) -# -print('\nAP all 0.2: ', ap_all_2) -print('AP all 0.4: ', ap_all_4) -print('AP all 0.8: ', ap_all_8) -print('AP all 1.0: ', ap_all_10) -print('AP all 1.2: ', ap_all_12) -# -# print('\nAP Seen 0.2: ', ap_seen_2) -# print('AP Seen 0.4: ', ap_seen_4) -# print('AP Seen 0.8: ', ap_seen_8) -# -# print('\nAP Unseen 0.2: ', ap_unseen_2) -# print('AP Unseen 0.4: ', ap_unseen_4) -# print('AP Unseen 0.8: ', ap_unseen_8) -# -# print('\nAP Novel 0.2: ', ap_novel_2) -# print('AP Novel 0.4: ', ap_novel_4) -# print('AP Novel 0.8: ', ap_novel_8) diff --git a/dataset/generate_graspness.py b/dataset/generate_graspness.py index a546bb3..f75b480 100644 --- a/dataset/generate_graspness.py +++ b/dataset/generate_graspness.py @@ -10,17 +10,24 @@ import torch from graspnetAPI.utils.xmlhandler import xmlReader from graspnetAPI.utils.utils import get_obj_pose_list, transform_points +import argparse + + +parser = argparse.ArgumentParser() +parser.add_argument('--dataset_root', default=None, required=True) +parser.add_argument('--camera_type', default='kinect', help='Camera split [realsense/kinect]') if __name__ == '__main__': - dataset_root = '/media/bot/980A6F5E0A6F38801/datasets/graspnet/' - camera_type = 'realsense' + cfgs = parser.parse_args() + dataset_root = cfgs.dataset_root # set dataset root + camera_type = cfgs.camera_type # kinect / realsense save_path_root = os.path.join(dataset_root, 'graspness') num_views, num_angles, num_depths = 300, 12, 4 fric_coef_thresh = 0.6 point_grasp_num = num_views * num_angles * num_depths - for scene_id in range(101): + for scene_id in range(100): for ann_id in range(256): # get scene point cloud print('generating scene: {} ann: {}'.format(scene_id, ann_id)) diff --git a/dataset/graspnet_dataset.py b/dataset/graspnet_dataset.py index 0fb9c45..67f4bf6 100644 --- a/dataset/graspnet_dataset.py +++ b/dataset/graspnet_dataset.py @@ -18,14 +18,13 @@ class GraspNetDataset(Dataset): def __init__(self, root, grasp_labels=None, camera='kinect', split='train', num_points=20000, - voxel_size=0.005, remove_outlier=True, remove_invisible=False, augment=False, load_label=True): + voxel_size=0.005, remove_outlier=True, augment=False, load_label=True): assert (num_points <= 50000) self.root = root self.split = split self.voxel_size = voxel_size self.num_points = num_points self.remove_outlier = remove_outlier - self.remove_invisible = remove_invisible self.grasp_labels = grasp_labels self.camera = camera self.augment = augment @@ -42,8 +41,6 @@ def __init__(self, root, grasp_labels=None, camera='kinect', split='train', num_ self.sceneIds = list(range(130, 160)) elif split == 'test_novel': self.sceneIds = list(range(160, 190)) - elif split == 'mini_test': - self.sceneIds = list(range(100, 101)) self.sceneIds = ['scene_{}'.format(str(x).zfill(4)) for x in self.sceneIds] self.depthpath = [] @@ -100,8 +97,7 @@ def __getitem__(self, index): else: return self.get_data(index) - def get_data(self, index): - # color = np.array(Image.open(self.colorpath[index]), dtype=np.float32) / 255.0 + def get_data(self, index, return_raw_cloud=False): depth = np.array(Image.open(self.depthpath[index])) seg = np.array(Image.open(self.labelpath[index])) meta = scio.loadmat(self.metapath[index]) @@ -120,7 +116,6 @@ def get_data(self, index): # get valid points depth_mask = (depth > 0) - # seg_mask = (seg > 0) if self.remove_outlier: camera_poses = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'camera_poses.npy')) align_mat = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'cam0_wrt_table.npy')) @@ -130,8 +125,9 @@ def get_data(self, index): else: mask = depth_mask cloud_masked = cloud[mask] - # seg_masked = seg[mask] + if return_raw_cloud: + return cloud_masked # sample points random if len(cloud_masked) >= self.num_points: idxs = np.random.choice(len(cloud_masked), self.num_points, replace=False) @@ -139,9 +135,6 @@ def get_data(self, index): idxs1 = np.arange(len(cloud_masked)) idxs2 = np.random.choice(len(cloud_masked), self.num_points - len(cloud_masked), replace=True) idxs = np.concatenate([idxs1, idxs2], axis=0) - - # idxs_id = int(self.depthpath[index].split('scene_')[-1][:4]) * 256 + int(self.depthpath[index][-8:-4]) - # idxs = np.load('/data/cloud_inds_{}/{}.npy'.format(self.camera, idxs_id))[0] # for bot cloud_sampled = cloud_masked[idxs] ret_dict = {'point_clouds': cloud_sampled.astype(np.float32), @@ -151,7 +144,6 @@ def get_data(self, index): return ret_dict def get_data_label(self, index): - # color = np.array(Image.open(self.colorpath[index]), dtype=np.float32) / 255.0 depth = np.array(Image.open(self.depthpath[index])) seg = np.array(Image.open(self.labelpath[index])) meta = scio.loadmat(self.metapath[index]) @@ -182,7 +174,6 @@ def get_data_label(self, index): else: mask = depth_mask cloud_masked = cloud[mask] - # color_masked = color[mask] seg_masked = seg[mask] # sample points @@ -193,7 +184,6 @@ def get_data_label(self, index): idxs2 = np.random.choice(len(cloud_masked), self.num_points - len(cloud_masked), replace=True) idxs = np.concatenate([idxs1, idxs2], axis=0) cloud_sampled = cloud_masked[idxs] - # color_sampled = color_masked[idxs] seg_sampled = seg_masked[idxs] graspness_sampled = graspness[idxs] objectness_label = seg_sampled.copy() @@ -211,15 +201,6 @@ def get_data_label(self, index): points, widths, scores = self.grasp_labels[obj_idx] collision = self.collision_labels[scene][i] # (Np, V, A, D) - # remove invisible grasp points - if self.remove_invisible: - visible_mask = remove_invisible_grasp_points(cloud_sampled[seg_sampled == obj_idx], points, - poses[:, :, i], th=0.01) - points = points[visible_mask] - widths = widths[visible_mask] - scores = scores[visible_mask] - collision = collision[visible_mask] - idxs = np.random.choice(len(points), min(max(int(len(points) / 4), 300), len(points)), replace=False) grasp_points_list.append(points[idxs]) grasp_widths_list.append(widths[idxs]) diff --git a/dataset/simplify_dataset.py b/dataset/simplify_dataset.py index 2281acb..0a7efd5 100644 --- a/dataset/simplify_dataset.py +++ b/dataset/simplify_dataset.py @@ -1,7 +1,10 @@ -from tqdm import tqdm import numpy as np import os -import scipy.io as scio +import argparse + + +parser = argparse.ArgumentParser() +parser.add_argument('--dataset_root', default=None, required=True) def simplify_grasp_labels(root, save_path): @@ -33,7 +36,8 @@ def simplify_grasp_labels(root, save_path): if __name__ == '__main__': - root = '/media/bot/980A6F5E0A6F38801/datasets/graspnet/' + cfgs = parser.parse_args() + root = cfgs.dataset_root # set root and save path save_path = os.path.join(root, 'grasp_label_simplified') simplify_grasp_labels(root, save_path) diff --git a/demo.py b/demo.py deleted file mode 100644 index f257bcb..0000000 --- a/demo.py +++ /dev/null @@ -1,124 +0,0 @@ -""" Demo to show prediction results. - Author: chenxi-wang -""" - -import os -import sys -import numpy as np -import open3d as o3d -import argparse -import importlib -import scipy.io as scio -from PIL import Image - -import torch -from graspnetAPI import GraspGroup - -ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(os.path.join(ROOT_DIR, 'models')) -sys.path.append(os.path.join(ROOT_DIR, 'dataset')) -sys.path.append(os.path.join(ROOT_DIR, 'utils')) - -from graspnet import GraspNet, pred_decode -from graspnet_dataset import GraspNetDataset -from collision_detector import ModelFreeCollisionDetector -from data_utils import CameraInfo, create_point_cloud_from_depth_image - -parser = argparse.ArgumentParser() -parser.add_argument('--checkpoint_path', required=True, help='Model checkpoint path') -parser.add_argument('--num_point', type=int, default=20000, help='Point Number [default: 20000]') -parser.add_argument('--num_view', type=int, default=300, help='View Number [default: 300]') -parser.add_argument('--collision_thresh', type=float, default=0.01, help='Collision Threshold in collision detection [default: 0.01]') -parser.add_argument('--voxel_size', type=float, default=0.01, help='Voxel Size to process point clouds before collision detection [default: 0.01]') -cfgs = parser.parse_args() - - -def get_net(): - # Init the model - net = GraspNet(input_feature_dim=0, num_view=cfgs.num_view, num_angle=12, num_depth=4, - cylinder_radius=0.05, hmin=-0.02, hmax_list=[0.01,0.02,0.03,0.04], is_training=False) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - net.to(device) - # Load checkpoint - checkpoint = torch.load(cfgs.checkpoint_path) - net.load_state_dict(checkpoint['model_state_dict']) - start_epoch = checkpoint['epoch'] - print("-> loaded checkpoint %s (epoch: %d)"%(cfgs.checkpoint_path, start_epoch)) - # set model to eval mode - net.eval() - return net - -def get_and_process_data(data_dir): - # load data - color = np.array(Image.open(os.path.join(data_dir, 'color.png')), dtype=np.float32) / 255.0 - depth = np.array(Image.open(os.path.join(data_dir, 'depth.png'))) - workspace_mask = np.array(Image.open(os.path.join(data_dir, 'workspace_mask.png'))) - meta = scio.loadmat(os.path.join(data_dir, 'meta.mat')) - intrinsic = meta['intrinsic_matrix'] - factor_depth = meta['factor_depth'] - - # generate cloud - camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2], factor_depth) - cloud = create_point_cloud_from_depth_image(depth, camera, organized=True) - - # get valid points - mask = (workspace_mask & (depth > 0)) - cloud_masked = cloud[mask] - color_masked = color[mask] - - # sample points - if len(cloud_masked) >= cfgs.num_point: - idxs = np.random.choice(len(cloud_masked), cfgs.num_point, replace=False) - else: - idxs1 = np.arange(len(cloud_masked)) - idxs2 = np.random.choice(len(cloud_masked), cfgs.num_point-len(cloud_masked), replace=True) - idxs = np.concatenate([idxs1, idxs2], axis=0) - cloud_sampled = cloud_masked[idxs] - color_sampled = color_masked[idxs] - - # convert data - cloud = o3d.geometry.PointCloud() - cloud.points = o3d.utility.Vector3dVector(cloud_masked.astype(np.float32)) - cloud.colors = o3d.utility.Vector3dVector(color_masked.astype(np.float32)) - end_points = dict() - cloud_sampled = torch.from_numpy(cloud_sampled[np.newaxis].astype(np.float32)) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - cloud_sampled = cloud_sampled.to(device) - end_points['point_clouds'] = cloud_sampled - end_points['cloud_colors'] = color_sampled - - return end_points, cloud - -def get_grasps(net, end_points): - # Forward pass - with torch.no_grad(): - end_points = net(end_points) - grasp_preds = pred_decode(end_points) - gg_array = grasp_preds[0].detach().cpu().numpy() - gg = GraspGroup(gg_array) - return gg - -def collision_detection(gg, cloud): - mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size) - collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=cfgs.collision_thresh) - gg = gg[~collision_mask] - return gg - -def vis_grasps(gg, cloud): - gg.nms() - gg.sort_by_score() - gg = gg[:50] - grippers = gg.to_open3d_geometry_list() - o3d.visualization.draw_geometries([cloud, *grippers]) - -def demo(data_dir): - net = get_net() - end_points, cloud = get_and_process_data(data_dir) - gg = get_grasps(net, end_points) - if cfgs.collision_thresh > 0: - gg = collision_detection(gg, np.array(cloud.points)) - vis_grasps(gg, cloud) - -if __name__=='__main__': - data_dir = 'doc/example_data' - demo(data_dir) diff --git a/models/backbone.py b/models/backbone.py deleted file mode 100644 index 859190a..0000000 --- a/models/backbone.py +++ /dev/null @@ -1,131 +0,0 @@ -""" PointNet2 backbone for feature learning. - Author: Charles R. Qi -""" -import os -import sys -import torch -import torch.nn as nn - -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -ROOT_DIR = os.path.dirname(BASE_DIR) -sys.path.append(ROOT_DIR) - -from pointnet2.pointnet2_modules import PointnetSAModuleVotes, PointnetFPModule - - -class Pointnet2Backbone(nn.Module): - r""" - Backbone network for point cloud feature learning. - Based on Pointnet++ single-scale grouping network. - - Parameters - ---------- - input_feature_dim: int - Number of input channels in the feature descriptor for each point. - e.g. 3 for RGB. - """ - def __init__(self, input_feature_dim=0): - super().__init__() - - self.sa1 = PointnetSAModuleVotes( - npoint=2048, - radius=0.04, - nsample=64, - mlp=[input_feature_dim, 64, 64, 128], - use_xyz=True, - normalize_xyz=True - ) - - self.sa2 = PointnetSAModuleVotes( - npoint=1024, - radius=0.1, - nsample=32, - mlp=[128, 128, 128, 256], - use_xyz=True, - normalize_xyz=True - ) - - self.sa3 = PointnetSAModuleVotes( - npoint=512, - radius=0.2, - nsample=16, - mlp=[256, 128, 128, 256], - use_xyz=True, - normalize_xyz=True - ) - - self.sa4 = PointnetSAModuleVotes( - npoint=256, - radius=0.3, - nsample=16, - mlp=[256, 128, 128, 256], - use_xyz=True, - normalize_xyz=True - ) - - self.fp1 = PointnetFPModule(mlp=[256+256,256,256]) - self.fp2 = PointnetFPModule(mlp=[256+256,256,256]) - - def _break_up_pc(self, pc): - xyz = pc[..., 0:3].contiguous() - features = ( - pc[..., 3:].transpose(1, 2).contiguous() - if pc.size(-1) > 3 else None - ) - - return xyz, features - - def forward(self, pointcloud: torch.cuda.FloatTensor, end_points=None): - r""" - Forward pass of the network - - Parameters - ---------- - pointcloud: Variable(torch.cuda.FloatTensor) - (B, N, 3 + input_feature_dim) tensor - Point cloud to run predicts on - Each point in the point-cloud MUST - be formated as (x, y, z, features...) - - Returns - ---------- - end_points: {XXX_xyz, XXX_features, XXX_inds} - XXX_xyz: float32 Tensor of shape (B,K,3) - XXX_features: float32 Tensor of shape (B,D,K) - XXX_inds: int64 Tensor of shape (B,K) values in [0,N-1] - """ - if not end_points: end_points = {} - batch_size = pointcloud.shape[0] - - xyz, features = self._break_up_pc(pointcloud) - end_points['input_xyz'] = xyz - end_points['input_features'] = features - - # --------- 4 SET ABSTRACTION LAYERS --------- - xyz, features, fps_inds = self.sa1(xyz, features) - end_points['sa1_inds'] = fps_inds - end_points['sa1_xyz'] = xyz - end_points['sa1_features'] = features - - xyz, features, fps_inds = self.sa2(xyz, features) # this fps_inds is just 0,1,...,1023 - end_points['sa2_inds'] = fps_inds - end_points['sa2_xyz'] = xyz - end_points['sa2_features'] = features - - xyz, features, fps_inds = self.sa3(xyz, features) # this fps_inds is just 0,1,...,511 - end_points['sa3_xyz'] = xyz - end_points['sa3_features'] = features - - xyz, features, fps_inds = self.sa4(xyz, features) # this fps_inds is just 0,1,...,255 - end_points['sa4_xyz'] = xyz - end_points['sa4_features'] = features - - # --------- 2 FEATURE UPSAMPLING LAYERS -------- - features = self.fp1(end_points['sa3_xyz'], end_points['sa4_xyz'], end_points['sa3_features'], end_points['sa4_features']) - features = self.fp2(end_points['sa2_xyz'], end_points['sa3_xyz'], end_points['sa2_features'], features) - end_points['fp2_features'] = features - end_points['fp2_xyz'] = end_points['sa2_xyz'] - num_seed = end_points['fp2_xyz'].shape[1] - end_points['fp2_inds'] = end_points['sa1_inds'][:,0:num_seed] # indices among the entire input point clouds - - return features, end_points['fp2_xyz'], end_points \ No newline at end of file diff --git a/models/graspnet.py b/models/graspnet.py index 20961bc..a3a20a5 100644 --- a/models/graspnet.py +++ b/models/graspnet.py @@ -13,7 +13,7 @@ ROOT_DIR = os.path.dirname(BASE_DIR) sys.path.append(ROOT_DIR) -from models.backbone_resunet14 import MinkUNet14, MinkUNet14D, MinkUNet14C +from models.backbone_resunet14 import MinkUNet14D from models.modules import ApproachNet, GraspableNet, CloudCrop, SWADNet from loss_utils import GRASP_MAX_WIDTH, NUM_VIEW, NUM_ANGLE, NUM_DEPTH, GRASPNESS_THRESHOLD, M_POINT from label_generation import process_grasp_labels, match_grasp_view_and_label, batch_viewpoint_params_to_matrix @@ -24,7 +24,6 @@ class GraspNet(nn.Module): def __init__(self, cylinder_radius=0.05, seed_feat_dim=512, is_training=True): super().__init__() self.is_training = is_training - # self.log_string = log_string self.seed_feature_dim = seed_feat_dim self.num_depth = NUM_DEPTH self.num_angle = NUM_ANGLE @@ -58,26 +57,10 @@ def forward(self, end_points): seed_features_graspable = [] seed_xyz_graspable = [] - # seed_inds_graspable = [] graspable_num_batch = 0. for i in range(B): cur_mask = graspable_mask[i] - # inds = torch.arange(point_num).to(objectness_score.device) - # graspable_num = cur_mask.sum() - # graspable_num_batch += graspable_num graspable_num_batch += cur_mask.sum() - # if graspable_num < 200: - # if self.log_string is None: - # print('Warning!!! Two few graspable points! only {}'.format(graspable_num)) - # else: - # self.log_string('Warning!!! Two few graspable points! only {}'.format(graspable_num)) - # cur_mask_danger = cur_mask.detach().clone() - # cur_mask_danger[:800] = True - # graspable_num = cur_mask_danger.sum() - # cur_feat = seed_features_flipped[i][cur_mask_danger] - # cur_seed_xyz = seed_xyz[i][cur_mask_danger] - # # cur_inds = inds[cur_mask_danger] - # else: cur_feat = seed_features_flipped[i][cur_mask] # Ns*feat_dim cur_seed_xyz = seed_xyz[i][cur_mask] # Ns*3 @@ -89,28 +72,15 @@ def forward(self, end_points): cur_feat_flipped = cur_feat.unsqueeze(0).transpose(1, 2).contiguous() # 1*feat_dim*Ns cur_feat = gather_operation(cur_feat_flipped, fps_idxs).squeeze(0).contiguous() # feat_dim*Ns - # random sample M_points - # if graspable_num >= self.M_points: - # idxs = torch.multinomial(torch.ones(graspable_num), self.M_points, replacement=False) - # else: - # idxs1 = torch.arange(graspable_num) - # idxs2 = torch.multinomial(torch.ones(graspable_num), self.M_points - graspable_num, replacement=True) - # idxs = torch.cat([idxs1, idxs2]) - # cur_feat = cur_feat[idxs] - # cur_seed_xyz = cur_seed_xyz[idxs] - seed_features_graspable.append(cur_feat) seed_xyz_graspable.append(cur_seed_xyz) seed_xyz_graspable = torch.stack(seed_xyz_graspable, 0) # B*Ns*3 seed_features_graspable = torch.stack(seed_features_graspable) # B*feat_dim*Ns - # seed_features_graspable = seed_features_graspable.transpose(1, 2) # don't need transpose for FPS sample end_points['xyz_graspable'] = seed_xyz_graspable - # end_points['inds_graspable'] = seed_inds_graspable end_points['graspable_count_stage1'] = graspable_num_batch / B end_points, res_feat = self.rotation(seed_features_graspable, end_points) seed_features_graspable = seed_features_graspable + res_feat # residual feat from view selection - # end_points = self.rotation(seed_features_graspable, end_points) if self.is_training: end_points = process_grasp_labels(end_points) @@ -137,7 +107,7 @@ def pred_decode(end_points): grasp_angle = (grasp_score_inds // NUM_DEPTH) * np.pi / 12 grasp_depth = (grasp_score_inds % NUM_DEPTH + 1) * 0.01 grasp_depth = grasp_depth.view(-1, 1) - grasp_width = 1.2 * end_points['grasp_width_pred'][i] / 10. # grasp width gt has been multiplied by 10 + grasp_width = 1.2 * end_points['grasp_width_pred'][i] / 10. grasp_width = grasp_width.view(M_POINT, NUM_ANGLE*NUM_DEPTH) grasp_width = torch.gather(grasp_width, 1, grasp_score_inds.view(-1, 1)) grasp_width = torch.clamp(grasp_width, min=0., max=GRASP_MAX_WIDTH) diff --git a/models/loss.py b/models/loss.py index 60966b4..7f589f4 100644 --- a/models/loss.py +++ b/models/loss.py @@ -1,9 +1,5 @@ -""" Loss functions for training. - Author: chenxi-wang -""" - -import torch import torch.nn as nn +import torch def get_loss(end_points): @@ -24,12 +20,12 @@ def compute_objectness_loss(end_points): loss = criterion(objectness_score, objectness_label) end_points['loss/stage1_objectness_loss'] = loss - # objectness_pred = torch.argmax(objectness_score, 1) - # end_points['stage1_objectness_acc'] = (objectness_pred == objectness_label.long()).float().mean() - # end_points['stage1_objectness_prec'] = (objectness_pred == objectness_label.long())[ - # objectness_pred == 1].float().mean() - # end_points['stage1_objectness_recall'] = (objectness_pred == objectness_label.long())[ - # objectness_label == 1].float().mean() + objectness_pred = torch.argmax(objectness_score, 1) + end_points['stage1_objectness_acc'] = (objectness_pred == objectness_label.long()).float().mean() + end_points['stage1_objectness_prec'] = (objectness_pred == objectness_label.long())[ + objectness_pred == 1].float().mean() + end_points['stage1_objectness_recall'] = (objectness_pred == objectness_label.long())[ + objectness_label == 1].float().mean() return loss, end_points @@ -42,21 +38,14 @@ def compute_graspness_loss(end_points): loss = loss[loss_mask] loss = loss.mean() - # graspness_score_c = graspness_score.detach().clone()[loss_mask] - # graspness_label_c = graspness_label.detach().clone()[loss_mask] - # graspness_score_c = torch.clamp(graspness_score_c, 0., 0.99) - # graspness_label_c = torch.clamp(graspness_label_c, 0., 0.99) - # rank_error = (torch.abs(torch.trunc(graspness_score_c * 20) - torch.trunc(graspness_label_c * 20)) / 20.).mean() - # end_points['stage1_graspness_acc_rank_error'] = rank_error - - # graspness_score_c[graspness_score_c > 0.15] = 1 - # graspness_score_c[graspness_score_c <= 0.15] = 0 - # graspness_label_c[graspness_label_c > 0.15] = 1 - # graspness_label_c[graspness_label_c <= 0.15] = 0 - # end_points['graspness_acc'] = (graspness_score_c.bool() == graspness_label_c.bool()).float().mean() + graspness_score_c = graspness_score.detach().clone()[loss_mask] + graspness_label_c = graspness_label.detach().clone()[loss_mask] + graspness_score_c = torch.clamp(graspness_score_c, 0., 0.99) + graspness_label_c = torch.clamp(graspness_label_c, 0., 0.99) + rank_error = (torch.abs(torch.trunc(graspness_score_c * 20) - torch.trunc(graspness_label_c * 20)) / 20.).mean() + end_points['stage1_graspness_acc_rank_error'] = rank_error end_points['loss/stage1_graspness_loss'] = loss - return loss, end_points @@ -74,19 +63,15 @@ def compute_score_loss(end_points): grasp_score_pred = end_points['grasp_score_pred'] grasp_score_label = end_points['batch_grasp_score'] loss = criterion(grasp_score_pred, grasp_score_label) - # loss_mask = end_points['objectness_label'].bool() - # loss_mask = torch.gather(loss_mask, 1, end_points['inds_graspable']) - # loss = loss[loss_mask].mean() + end_points['loss/stage3_score_loss'] = loss - # end_points['loss/stage3_score_loss_point_num'] = loss_mask.sum() / B return loss, end_points def compute_width_loss(end_points): criterion = nn.SmoothL1Loss(reduction='none') grasp_width_pred = end_points['grasp_width_pred'] - # norm by cylinder radius(Crop module) and norm to 0~1, original with is 0~0.1, /0.05->0~2, /2->0~1 - grasp_width_label = end_points['batch_grasp_width'] * 10 # norm by cylinder radius(Crop module) + grasp_width_label = end_points['batch_grasp_width'] * 10 loss = criterion(grasp_width_pred, grasp_width_label) grasp_score_label = end_points['batch_grasp_score'] loss_mask = grasp_score_label > 0 diff --git a/models/modules.py b/models/modules.py index 0747c71..3be1789 100644 --- a/models/modules.py +++ b/models/modules.py @@ -21,16 +21,9 @@ class GraspableNet(nn.Module): def __init__(self, seed_feature_dim): super().__init__() self.in_dim = seed_feature_dim - # self.conv1 = nn.Conv1d(self.in_dim, 128, 1) self.conv_graspable = nn.Conv1d(self.in_dim, 3, 1) def forward(self, seed_features, end_points): - # features = F.relu(self.bn1(self.conv1(seed_features)), inplace=True) - # features = F.relu(self.bn2(self.conv2(features)), inplace=True) - # features = F.relu(self.conv1(seed_features), inplace=True) - # features = F.relu(self.conv2(features), inplace=True) - - # seed_features = F.relu(self.conv1(seed_features), inplace=True) graspable_score = self.conv_graspable(seed_features) # (B, 3, num_seed) end_points['objectness_score'] = graspable_score[:, :2] end_points['graspness_score'] = graspable_score[:, 2] @@ -39,14 +32,6 @@ def forward(self, seed_features, end_points): class ApproachNet(nn.Module): def __init__(self, num_view, seed_feature_dim, is_training=True): - """ Approach vector estimation from seed point features. - - Input: - num_view: [int] - number of views generated from each each seed point - seed_feature_dim: [int] - number of channels of seed point features - """ super().__init__() self.num_view = num_view self.in_dim = seed_feature_dim @@ -55,23 +40,12 @@ def __init__(self, num_view, seed_feature_dim, is_training=True): self.conv2 = nn.Conv1d(self.in_dim, self.num_view, 1) def forward(self, seed_features, end_points): - """ Forward pass. - - Input: - seed_features: [torch.FloatTensor, (batch_size,feature_dim,num_seed) - features of seed points - end_points: [dict] - - Output: - end_points: [dict] - """ B, _, num_seed = seed_features.size() res_features = F.relu(self.conv1(seed_features), inplace=True) features = self.conv2(res_features) view_score = features.transpose(1, 2).contiguous() # (B, num_seed, num_view) end_points['view_score'] = view_score - # print(view_score.min(), view_score.max(), view_score.mean()) if self.is_training: # normalize view graspness score to 0~1 view_score_ = view_score.clone().detach() @@ -104,20 +78,6 @@ def forward(self, seed_features, end_points): class CloudCrop(nn.Module): - """ Cylinder group and align for grasp configure estimation. Return a list of grouped points with different cropping depths. - Input: - nsample: [int] - sample number in a group - seed_feature_dim: [int] - number of channels of grouped points - cylinder_radius: [float] - radius of the cylinder space - hmin: [float] - height of the bottom surface - hmax_list: [list of float] - list of heights of the upper surface - """ - def __init__(self, nsample, seed_feature_dim, cylinder_radius=0.05, hmin=-0.02, hmax=0.04): super().__init__() self.nsample = nsample @@ -125,25 +85,11 @@ def __init__(self, nsample, seed_feature_dim, cylinder_radius=0.05, hmin=-0.02, self.cylinder_radius = cylinder_radius mlps = [3 + self.in_dim, 256, 256] # use xyz, so plus 3 - # returned group features without xyz, normalize xyz as graspness paper - # (i don't know if features with xyz will be better) self.grouper = CylinderQueryAndGroup(radius=cylinder_radius, hmin=hmin, hmax=hmax, nsample=nsample, use_xyz=True, normalize_xyz=True) self.mlps = pt_utils.SharedMLP(mlps, bn=True) def forward(self, seed_xyz_graspable, seed_features_graspable, vp_rot): - """ Forward pass. - Input: - seed_xyz: [torch.FloatTensor, (batch_size,num_seed,3)] - coordinates of seed points - pointcloud: [torch.FloatTensor, (batch_size,num_seed,3)] - the points to be cropped - vp_rot: [torch.FloatTensor, (batch_size,num_seed,3,3)] - rotation matrices generated from approach vectors - Output: - vp_features: [torch.FloatTensor, (batch_size,num_features,num_seed,num_depth)] - features of grouped points in different depths - """ grouped_feature = self.grouper(seed_xyz_graspable, seed_xyz_graspable, vp_rot, seed_features_graspable) # B*3 + feat_dim*M*K new_features = self.mlps(grouped_feature) # (batch_size, mlps[-1], M, K) @@ -153,15 +99,6 @@ def forward(self, seed_xyz_graspable, seed_features_graspable, vp_rot): class SWADNet(nn.Module): - """ Grasp configure estimation. - - Input: - num_angle: [int] - number of in-plane rotation angle classes - the value of the i-th class --> i*PI/num_angle (i=0,...,num_angle-1) - num_depth: [int] - number of gripper depth classes - """ def __init__(self, num_angle, num_depth): super().__init__() self.num_angle = num_angle @@ -171,16 +108,6 @@ def __init__(self, num_angle, num_depth): self.conv_swad = nn.Conv1d(256, 2*num_angle*num_depth, 1) def forward(self, vp_features, end_points): - """ Forward pass. - - Input: - vp_features: [torch.FloatTensor, (batch_size,num_seed,3)] - features of grouped points in different depths - end_points: [dict] - - Output: - end_points: [dict] - """ B, _, num_seed = vp_features.size() vp_features = F.relu(self.conv1(vp_features), inplace=True) vp_features = self.conv_swad(vp_features) diff --git a/models/resnet.py b/models/resnet.py index c5acb98..6e57827 100644 --- a/models/resnet.py +++ b/models/resnet.py @@ -1,9 +1,4 @@ -import os -import numpy as np - -import torch import torch.nn as nn -from torch.optim import SGD try: import open3d as o3d diff --git a/requirements.txt b/requirements.txt index 1a8e892..2b9704a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ scipy open3d>=0.8 Pillow tqdm +MinkowskiEngine==0.5.4 \ No newline at end of file diff --git a/test.py b/test.py index e16d1ec..11b7f05 100644 --- a/test.py +++ b/test.py @@ -5,8 +5,7 @@ import time import torch from torch.utils.data import DataLoader -# from graspnetAPI.graspnet_eval import GraspGroup, GraspNetEval -from graspnetAPI.graspnet_eval_zibo import GraspGroup, GraspNetEval # change the library code to save eval results +from graspnetAPI.graspnet_eval import GraspGroup, GraspNetEval ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.join(ROOT_DIR, 'pointnet2')) @@ -16,14 +15,12 @@ from models.graspnet import GraspNet, pred_decode from dataset.graspnet_dataset import GraspNetDataset, minkowski_collate_fn -# from utils.collision_detector import ModelFreeCollisionDetector +from utils.collision_detector import ModelFreeCollisionDetector parser = argparse.ArgumentParser() -parser.add_argument('--dataset_root', default='/media/bot/980A6F5E0A6F38801/datasets/graspnet') -parser.add_argument('--checkpoint_path', help='Model checkpoint path', - default='/data/zibo/logs/log_kn_v7/np15000_dim512_graspness1e-1_M1024_bs2_lr5e-4_viewres_dataaug_fps_epoch05.tar') -parser.add_argument('--dump_dir', help='Dump dir to save outputs', - default='/data/zibo/logs/log_kn_v7/dump_epoch05') +parser.add_argument('--dataset_root', default=None, required=True) +parser.add_argument('--checkpoint_path', help='Model checkpoint path', default=None, required=True) +parser.add_argument('--dump_dir', help='Dump dir to save outputs', default=None, required=True) parser.add_argument('--seed_feat_dim', default=512, type=int, help='Point wise feature dim') parser.add_argument('--camera', default='kinect', help='Camera split [realsense/kinect]') parser.add_argument('--num_point', type=int, default=15000, help='Point Number [default: 15000]') @@ -48,7 +45,7 @@ def my_worker_init_fn(worker_id): def inference(): - test_dataset = GraspNetDataset(cfgs.dataset_root, split='mini_test', camera=cfgs.camera, num_points=cfgs.num_point, + test_dataset = GraspNetDataset(cfgs.dataset_root, split='test_seen', camera=cfgs.camera, num_points=cfgs.num_point, voxel_size=cfgs.voxel_size, remove_outlier=True, augment=False, load_label=False) print('Test dataset length: ', len(test_dataset)) scene_list = test_dataset.scene_list() @@ -90,11 +87,11 @@ def inference(): gg = GraspGroup(preds) # collision detection - # if cfgs.collision_thresh > 0: - # cloud, _ = test.get_data(data_idx, return_raw_cloud=True) - # mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size) - # collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=cfgs.collision_thresh) - # gg = gg[~collision_mask] + if cfgs.collision_thresh > 0: + cloud = test_dataset.get_data(data_idx, return_raw_cloud=True) + mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size_cd) + collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=cfgs.collision_thresh) + gg = gg[~collision_mask] # save grasps save_dir = os.path.join(cfgs.dump_dir, scene_list[data_idx], cfgs.camera) @@ -109,16 +106,15 @@ def inference(): tic = time.time() -def evaluate(dump_dir): # changed the graspnetAPI code, it can directly save evaluating results +def evaluate(dump_dir): ge = GraspNetEval(root=cfgs.dataset_root, camera=cfgs.camera, split='test_seen') - # ge.eval_seen(dump_folder=dump_dir, proc=6) - ge.eval_scene(scene_id=100, dump_folder=dump_dir) + res, ap = ge.eval_seen(dump_folder=dump_dir, proc=6) + save_dir = os.path.join(cfgs.dump_dir, 'ap_{}.npy'.format(cfgs.camera)) + np.save(save_dir, res) if __name__ == '__main__': - # python test.py --infer --eval if cfgs.infer: inference() - # python test.py --dump_dir logs/dump_kn_decouple_quat/ if cfgs.eval: evaluate(cfgs.dump_dir) diff --git a/train.py b/train.py index 011a124..1755c1d 100644 --- a/train.py +++ b/train.py @@ -20,13 +20,11 @@ from dataset.graspnet_dataset import GraspNetDataset, minkowski_collate_fn, load_grasp_labels parser = argparse.ArgumentParser() -parser.add_argument('--dataset_root', default='/media/bot/980A6F5E0A6F38801/datasets/graspnet') +parser.add_argument('--dataset_root', default=None, required=True) parser.add_argument('--camera', default='kinect', help='Camera split [realsense/kinect]') -parser.add_argument('--checkpoint_path', help='Model checkpoint path', - default='/data/zibo/logs/log_kn_v8/np15000_graspness1e-1_bs2_lr7e-4_viewres_dataaug_fps_14D_epoch03.tar') -parser.add_argument('--model_name', type=str, - default='np15000_graspness1e-1_bs2_lr7e-4_viewres_dataaug_fps_14D') -parser.add_argument('--log_dir', default='/data/zibo/logs/log_kn_v8') +parser.add_argument('--checkpoint_path', help='Model checkpoint path', default=None) +parser.add_argument('--model_name', type=str, default=None) +parser.add_argument('--log_dir', default='logs/log') parser.add_argument('--num_point', type=int, default=15000, help='Point Number [default: 20000]') parser.add_argument('--seed_feat_dim', default=512, type=int, help='Point wise feature dim') parser.add_argument('--voxel_size', type=float, default=0.005, help='Voxel Size to process point clouds ') @@ -35,7 +33,6 @@ parser.add_argument('--learning_rate', type=float, default=0.0007, help='Initial learning rate [default: 0.001]') parser.add_argument('--resume', default=0, type=int, help='Whether to resume from checkpoint') cfgs = parser.parse_args() - # ------------------------------------------------------------------------- GLOBAL CONFIG BEG EPOCH_CNT = 0 CHECKPOINT_PATH = cfgs.checkpoint_path if cfgs.checkpoint_path is not None and cfgs.resume else None @@ -59,10 +56,9 @@ def my_worker_init_fn(worker_id): grasp_labels = load_grasp_labels(cfgs.dataset_root) -# grasp_labels=None TRAIN_DATASET = GraspNetDataset(cfgs.dataset_root, grasp_labels=grasp_labels, camera=cfgs.camera, split='train', num_points=cfgs.num_point, voxel_size=cfgs.voxel_size, - remove_outlier=True, augment=True, remove_invisible=False, load_label=True) + remove_outlier=True, augment=True, load_label=True) print('train dataset length: ', len(TRAIN_DATASET)) TRAIN_DATALOADER = DataLoader(TRAIN_DATASET, batch_size=cfgs.batch_size, shuffle=True, num_workers=0, worker_init_fn=my_worker_init_fn, collate_fn=minkowski_collate_fn) diff --git a/utils/label_generation.py b/utils/label_generation.py index 865cf45..508c638 100644 --- a/utils/label_generation.py +++ b/utils/label_generation.py @@ -22,7 +22,6 @@ def process_grasp_labels(end_points): batch_size, num_samples, _ = seed_xyzs.size() batch_grasp_points = [] - # batch_grasp_views = [] batch_grasp_views_rot = [] batch_grasp_scores = [] batch_grasp_widths = [] @@ -32,7 +31,6 @@ def process_grasp_labels(end_points): # get merged grasp points for label computation grasp_points_merged = [] - # grasp_views_merged = [] grasp_views_rot_merged = [] grasp_scores_merged = [] grasp_widths_merged = [] @@ -55,8 +53,6 @@ def process_grasp_labels(end_points): grasp_views_ = grasp_views.transpose(0, 1).contiguous().unsqueeze(0) grasp_views_trans_ = grasp_views_trans.transpose(0, 1).contiguous().unsqueeze(0) view_inds = knn(grasp_views_trans_, grasp_views_, k=1).squeeze() - 1 - # grasp_views_trans = torch.index_select(grasp_views_trans, 0, view_inds) # (V, 3) - # grasp_views_trans = grasp_views_trans.unsqueeze(0).expand(num_grasp_points, -1, -1) # (Np, V, 3) grasp_views_rot_trans = torch.index_select(grasp_views_rot_trans, 0, view_inds) # (V, 3, 3) grasp_views_rot_trans = grasp_views_rot_trans.unsqueeze(0).expand(num_grasp_points, -1, -1, -1) # (Np, V, 3, 3) @@ -64,13 +60,11 @@ def process_grasp_labels(end_points): grasp_widths = torch.index_select(grasp_widths, 1, view_inds) # (Np, V, A, D) # add to list grasp_points_merged.append(grasp_points_trans) - # grasp_views_merged.append(grasp_views_trans) grasp_views_rot_merged.append(grasp_views_rot_trans) grasp_scores_merged.append(grasp_scores) grasp_widths_merged.append(grasp_widths) grasp_points_merged = torch.cat(grasp_points_merged, dim=0) # (Np', 3) - # grasp_views_merged = torch.cat(grasp_views_merged, dim=0) # (Np', V, 3) grasp_views_rot_merged = torch.cat(grasp_views_rot_merged, dim=0) # (Np', V, 3, 3) grasp_scores_merged = torch.cat(grasp_scores_merged, dim=0) # (Np', V, A, D) grasp_widths_merged = torch.cat(grasp_widths_merged, dim=0) # (Np', V, A, D) @@ -82,20 +76,17 @@ def process_grasp_labels(end_points): # assign anchor points to real points grasp_points_merged = torch.index_select(grasp_points_merged, 0, nn_inds) # (Ns, 3) - # grasp_views_merged = torch.index_select(grasp_views_merged, 0, nn_inds) # (Ns, V, 3) grasp_views_rot_merged = torch.index_select(grasp_views_rot_merged, 0, nn_inds) # (Ns, V, 3, 3) grasp_scores_merged = torch.index_select(grasp_scores_merged, 0, nn_inds) # (Ns, V, A, D) grasp_widths_merged = torch.index_select(grasp_widths_merged, 0, nn_inds) # (Ns, V, A, D) # add to batch batch_grasp_points.append(grasp_points_merged) - # batch_grasp_views.append(grasp_views_merged) batch_grasp_views_rot.append(grasp_views_rot_merged) batch_grasp_scores.append(grasp_scores_merged) batch_grasp_widths.append(grasp_widths_merged) batch_grasp_points = torch.stack(batch_grasp_points, 0) # (B, Ns, 3) - # batch_grasp_views = torch.stack(batch_grasp_views, 0) # (B, Ns, V, 3) batch_grasp_views_rot = torch.stack(batch_grasp_views_rot, 0) # (B, Ns, V, 3, 3) batch_grasp_scores = torch.stack(batch_grasp_scores, 0) # (B, Ns, V, A, D) batch_grasp_widths = torch.stack(batch_grasp_widths, 0) # (B, Ns, V, A, D) @@ -111,13 +102,9 @@ def process_grasp_labels(end_points): view_graspness_max = view_graspness_max.unsqueeze(-1).expand(-1, -1, 300) # (B, Ns, V) view_graspness_min = view_graspness_min.unsqueeze(-1).expand(-1, -1, 300) # same shape as batch_grasp_view_graspness batch_grasp_view_graspness = (batch_grasp_view_graspness - view_graspness_min) / (view_graspness_max - view_graspness_min + 1e-5) - # batch_grasp_view_graspness = torch.clamp(batch_grasp_view_graspness, 0., 1.) # process scores label_mask = (batch_grasp_scores > 0) & (batch_grasp_widths <= GRASP_MAX_WIDTH) # (B, Ns, V, A, D) - # u_max = batch_grasp_scores.max() - # u_min = batch_grasp_scores[label_mask].min() - # batch_grasp_scores[label_mask] = torch.log(u_max / batch_grasp_scores[label_mask]) / torch.log(u_max / u_min) batch_grasp_scores[~label_mask] = 0 end_points['batch_grasp_point'] = batch_grasp_points diff --git a/vis_and_analysis.py b/vis_and_analysis.py deleted file mode 100644 index da49baa..0000000 --- a/vis_and_analysis.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np -from graspnetAPI import GraspGroup -import open3d as o3d -import scipy.io as scio -from PIL import Image -import os -from utils.data_utils import CameraInfo, create_point_cloud_from_depth_image, get_workspace_mask - -data_path = '/media/bot/980A6F5E0A6F38801/datasets/graspnet' -dump_dir = '/data/zibo/logs/log_kn_v1/dump_epoch05_fps/' -scene_id = 'scene_0101' -ann_id = '0000' - -poses = np.load(os.path.join(dump_dir, scene_id, 'kinect', ann_id + '.npy')) - -# vis -color = np.array(Image.open(os.path.join(data_path, 'scenes', scene_id, 'kinect', 'rgb', ann_id + '.png')), dtype=np.float32) / 255.0 -depth = np.array(Image.open(os.path.join(data_path, 'scenes', scene_id, 'kinect', 'depth', ann_id + '.png'))) -seg = np.array(Image.open(os.path.join(data_path, 'scenes', scene_id, 'kinect', 'label', ann_id + '.png'))) -meta = scio.loadmat(os.path.join(data_path, 'scenes', scene_id, 'kinect', 'meta', ann_id + '.mat')) -intrinsic = meta['intrinsic_matrix'] -factor_depth = meta['factor_depth'] -camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2], factor_depth) -point_cloud = create_point_cloud_from_depth_image(depth, camera, organized=True) -depth_mask = (depth > 0) -camera_poses = np.load(os.path.join(data_path, 'scenes', scene_id, 'kinect', 'camera_poses.npy')) -align_mat = np.load(os.path.join(data_path, 'scenes', scene_id, 'kinect', 'cam0_wrt_table.npy')) -trans = np.dot(align_mat, camera_poses[int(ann_id)]) -workspace_mask = get_workspace_mask(point_cloud, seg, trans=trans, organized=True, outlier=0.02) -mask = (depth_mask & workspace_mask) -point_cloud = point_cloud[mask] -color = color[mask] - -cloud = o3d.geometry.PointCloud() -cloud.points = o3d.utility.Vector3dVector(point_cloud.reshape(-1, 3).astype(np.float32)) -cloud.colors = o3d.utility.Vector3dVector(color.reshape(-1, 3).astype(np.float32)) -gg = GraspGroup(poses) -gg = gg.nms() -# print(gg.grasp_group_array.shape) -gg = gg.sort_by_score() -if gg.__len__() > 30: - gg = gg[:30] -grippers = gg.to_open3d_geometry_list() -o3d.visualization.draw_geometries([cloud, *grippers])