From 196fb5f1cbac5ea141d15171b0de5a85699a3b0f Mon Sep 17 00:00:00 2001
From: rhett-chen <2206571319@qq.com>
Date: Tue, 7 Dec 2021 21:20:28 +0800
Subject: [PATCH] release
---
README.md | 82 ++++++++++++++++++++
ap_in_one_image.py | 136 ----------------------------------
command_test.sh | 1 +
command_train.sh | 2 +-
compute_ap.py | 66 -----------------
dataset/generate_graspness.py | 13 +++-
dataset/graspnet_dataset.py | 27 +------
dataset/simplify_dataset.py | 10 ++-
demo.py | 124 -------------------------------
models/backbone.py | 131 --------------------------------
models/graspnet.py | 34 +--------
models/loss.py | 45 ++++-------
models/modules.py | 73 ------------------
models/resnet.py | 5 --
requirements.txt | 1 +
test.py | 34 ++++-----
train.py | 14 ++--
utils/label_generation.py | 13 ----
vis_and_analysis.py | 44 -----------
19 files changed, 143 insertions(+), 712 deletions(-)
create mode 100644 README.md
delete mode 100644 ap_in_one_image.py
create mode 100644 command_test.sh
delete mode 100644 compute_ap.py
delete mode 100644 demo.py
delete mode 100644 models/backbone.py
delete mode 100644 vis_and_analysis.py
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a7d9289
--- /dev/null
+++ b/README.md
@@ -0,0 +1,82 @@
+# GraspNet graspness
+My implementation of paper "Graspness Discovery in Clutters for Fast and Accurate Grasp Detection" (ICCV 2021).
+
+[[paper](https://openaccess.thecvf.com/content/ICCV2021/papers/Wang_Graspness_Discovery_in_Clutters_for_Fast_and_Accurate_Grasp_Detection_ICCV_2021_paper.pdf)]
+[[dataset](https://graspnet.net/)]
+[[API](https://github.com/graspnet/graspnetAPI)]
+
+
+## Requirements
+- Python 3
+- PyTorch 1.6
+- Open3d 0.8
+- TensorBoard 2.3
+- NumPy
+- SciPy
+- Pillow
+- tqdm
+- MinkowskiEngine
+
+## Installation
+Get the code.
+```bash
+git clone https://github.com/rhett-chen/graspness_implementation.git
+cd graspnet-graspness
+```
+Install packages via Pip.
+```bash
+pip install -r requirements.txt
+```
+Compile and install pointnet2 operators (code adapted from [votenet](https://github.com/facebookresearch/votenet)).
+```bash
+cd pointnet2
+python setup.py install
+```
+Compile and install knn operator (code adapted from [pytorch_knn_cuda](https://github.com/chrischoy/pytorch_knn_cuda)).
+```bash
+cd knn
+python setup.py install
+```
+Install graspnetAPI for evaluation.
+```bash
+git clone https://github.com/graspnet/graspnetAPI.git
+cd graspnetAPI
+pip install .
+```
+
+## Point level Graspness Generation
+Point level graspness label are not included in the original dataset, and need additional generation. Make sure you have downloaded the orginal dataset from [GraspNet](https://graspnet.net/). The generation code is in [dataset/generate_graspness.py](dataset/generate_graspness.py).
+```bash
+cd dataset
+python generate_graspness.py --dataset_root /data3/graspnet --camera_type kinect
+```
+
+## Simplify dataset
+original dataset grasp_label files have redundant data, We can significantly save the memory cost. The code is in [dataset/simplify_dataset.py](dataset/simplify_dataset.py)
+```bash
+cd dataset
+python simplify_dataset.py --dataset_root /data3/graspnet
+```
+
+## Training and Testing
+Training examples are shown in [command_train.sh](command_train.sh). `--dataset_root`, `--camera` and `--log_dir` should be specified according to your settings. You can use TensorBoard to visualize training process.
+
+Testing examples are shown in [command_test.sh](command_test.sh), which contains inference and result evaluation. `--dataset_root`, `--camera`, `--checkpoint_path` and `--dump_dir` should be specified according to your settings. Set `--collision_thresh` to -1 for fast inference.
+
+If you need the trained weights, you can contact me directly.
+
+## Results
+Results "In repo" report the model performance of my results without collision detection.
+
+Evaluation results on Kinect camera:
+| | | Seen | | | Similar | | | Novel | |
+|:--------:|:------:|:----------------:|:----------------:|:------:|:----------------:|:----------------:|:------:|:----------------:|:----------------:|
+| | __AP__ | AP0.8 | AP0.4 | __AP__ | AP0.8 | AP0.4 | __AP__ | AP0.8 | AP0.4 |
+| In paper | 61.19 | 71.46 | 56.04 | 47.39 | 56.78 | 40.43 | 19.01 | 23.73 | 10.60 |
+| In repo | 61.83 | 73.28 | 54.14 | 51.13 | 62.53 | 41.57 | 19.94 | 24.90 | 11.02 |
+
+## Troubleshooting
+If you meet the torch.floor error in MinkowskiEngine, you can simplify solve it by change the source code of MinkowskiEngine:
+MinkowskiEngine/utils/quantization.py 262,from discrete_coordinates =_auto_floor(coordinates) to discrete_coordinates = coordinates
+## Acknowledgement
+My code is mainly based on Graspnet-baseline https://github.com/graspnet/graspnet-baseline.
diff --git a/ap_in_one_image.py b/ap_in_one_image.py
deleted file mode 100644
index be9c6c2..0000000
--- a/ap_in_one_image.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import numpy as np
-from graspnetAPI import GraspGroup
-import os
-import pickle
-import open3d as o3d
-from graspnetAPI.utils.config import get_config
-from graspnetAPI.utils.xmlhandler import xmlReader
-from graspnetAPI.utils.eval_utils import create_table_points, parse_posevector, transform_points, \
- eval_grasp, load_dexnet_model, voxel_sample_points
-import sys
-
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-ROOT_DIR = os.path.dirname(BASE_DIR)
-sys.path.append(ROOT_DIR)
-
-
-def get_config_rgbmatter():
- '''
- - return the config dict
- '''
- config = dict()
- config['scene_id'] = 100
- config['index_str'] = '0000'
- config['camera'] = 'kinect'
- config['collision_detection_choice'] = 'point_cloud' # point_cloud or full_model
-
- # config['dataset_path'] = "/data3/graspnet/"
- config['dataset_path'] = '/media/bot/980A6F5E0A6F38801/datasets/graspnet'
-
- # config['res_6D_pose_path'] = '/home/zibo/cvpr2022/graspnet-cvpr/logs/' \
- # 'log_kn_graspness_minkowski_v0/dump_epoch04_fps'
- config['res_6D_pose_path'] = '/data/zibo/logs/log_kn_v1/dump_epoch06_fps/'
- config['scene_id_str'] = 'scene_' + str(config['scene_id']).zfill(4)
- return config
-
-
-my_config = get_config_rgbmatter()
-
-res_6d_path = os.path.join(my_config['res_6D_pose_path'], my_config['scene_id_str'], my_config['camera'],
- my_config['index_str'] + '.npy')
-# res_6d_path = os.path.join(my_config['grasp_gt_path'], my_config['scene_id_str'], my_config['camera'],
-# my_config['index_str'] + '.npy')
-# res_6d_path = my_config['res_6D_pose_path']
-grasp = np.load(res_6d_path)
-print('\ntesting {}, scene_{} image_{}'.format(res_6d_path, my_config['scene_id'], my_config['index_str']))
-
-grasp_group = GraspGroup(grasp)
-print('grasp shape: ', grasp_group.grasp_group_array.shape)
-camera_pose = np.load(os.path.join(my_config['dataset_path'], 'scenes', my_config['scene_id_str'], my_config['camera'],
- 'camera_poses.npy'))[int(my_config['index_str'])]
-align_mat = np.load(os.path.join(my_config['dataset_path'], 'scenes', my_config['scene_id_str'],
- my_config['camera'], 'cam0_wrt_table.npy'))
-scene_reader = xmlReader(os.path.join(my_config['dataset_path'], 'scenes', my_config['scene_id_str'],
- my_config['camera'], 'annotations', my_config['index_str'] + '.xml'))
-
-config = get_config()
-TOP_K = 50
-list_coe_of_friction = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2]
-max_width = 0.1
-model_dir = os.path.join(my_config['dataset_path'], 'models')
-
-posevectors = scene_reader.getposevectorlist()
-obj_list = []
-pose_list = []
-for posevector in posevectors:
- obj_idx, mat = parse_posevector(posevector)
- obj_list.append(obj_idx)
- pose_list.append(mat)
-table = create_table_points(1.0, 1.0, 0.05, dx=-0.5, dy=-0.5, dz=-0.05, grid_size=0.008)
-table_trans = transform_points(table, np.linalg.inv(np.matmul(align_mat, camera_pose)))
-
-gg_array = grasp_group.grasp_group_array
-min_width_mask = (gg_array[:, 1] < 0)
-max_width_mask = (gg_array[:, 1] > max_width)
-gg_array[min_width_mask, 1] = 0
-gg_array[max_width_mask, 1] = max_width
-grasp_group.grasp_group_array = gg_array
-
-obj_list = []
-model_list = []
-dexmodel_list = []
-for posevector in posevectors:
- obj_idx, _ = parse_posevector(posevector)
- obj_list.append(obj_idx)
-for obj_idx in obj_list:
- model = o3d.io.read_point_cloud(os.path.join(model_dir, '%03d' % obj_idx, 'nontextured.ply'))
- dex_cache_path = os.path.join(my_config['dataset_path'], "dex_models", '%03d.pkl' % obj_idx)
- if os.path.exists(dex_cache_path): # don't know why in 203, pickle.load() will throw an error, so add not
- with open(dex_cache_path, 'rb') as f:
- dexmodel = pickle.load(f)
- else:
- dexmodel = load_dexnet_model(os.path.join(model_dir, '%03d' % obj_idx, 'textured'))
- points = np.array(model.points)
- model_list.append(points)
- dexmodel_list.append(dexmodel)
-
-model_sampled_list = list()
-for model in model_list:
- model_sampled = voxel_sample_points(model, 0.008)
- model_sampled_list.append(model_sampled)
-
-grasp_list, score_list, collision_mask_list = eval_grasp(grasp_group, model_sampled_list, dexmodel_list, pose_list,
- config, table=table_trans, voxel_size=0.008, TOP_K=TOP_K)
-# remove empty
-grasp_list = [x for x in grasp_list if len(x) != 0]
-score_list = [x for x in score_list if len(x) != 0]
-collision_mask_list = [x for x in collision_mask_list if len(x) != 0]
-
-if len(grasp_list) == 0:
- grasp_accuracy = np.zeros((TOP_K, len(list_coe_of_friction)))
- print('\rMean Accuracy {}'.format(np.mean(grasp_accuracy[:, :])))
-
-# concat into scene level
-grasp_list, score_list, collision_mask_list = np.concatenate(grasp_list), np.concatenate(
- score_list), np.concatenate(collision_mask_list)
-# sort in scene level
-print('final grasp shape that used to compute ap: ', grasp_list.shape)
-grasp_confidence = grasp_list[:, 0]
-indices = np.argsort(-grasp_confidence)
-grasp_list, score_list, collision_mask_list = grasp_list[indices], score_list[indices], collision_mask_list[
- indices]
-
-# calculate AP
-grasp_accuracy = np.zeros((TOP_K, len(list_coe_of_friction)))
-print(score_list)
-# score_list = np.array([i for i in score_list if abs(i + 3) > 1.001])
-for fric_idx, fric in enumerate(list_coe_of_friction):
- for k in range(0, TOP_K):
- if k + 1 > len(score_list):
- grasp_accuracy[k, fric_idx] = np.sum(((score_list <= fric) & (score_list > 0)).astype(int)) / (
- k + 1)
- else:
- grasp_accuracy[k, fric_idx] = np.sum(
- ((score_list[0:k + 1] <= fric) & (score_list[0:k + 1] > 0)).astype(int)) / (k + 1)
-# print(grasp_accuracy)
-print('\nMean Accuracy: %.3f' % (100.0 * np.mean(grasp_accuracy[:, :])))
diff --git a/command_test.sh b/command_test.sh
new file mode 100644
index 0000000..1126d7a
--- /dev/null
+++ b/command_test.sh
@@ -0,0 +1 @@
+CUDA_VISIBLE_DEVICES=4 python test.py --camera kinect --dump_dir logs/log_kn/dump_epoch03 --checkpoint_path logs/log_kn/epoch03.tar --batch_size 1 --dataset_root /data3/graspnet --infer --eval
\ No newline at end of file
diff --git a/command_train.sh b/command_train.sh
index d139e64..6b5c623 100644
--- a/command_train.sh
+++ b/command_train.sh
@@ -1 +1 @@
-CUDA_VISIBLE_DEVICES=4 python train.py --log_dir logs/log_kn_v9 --batch_size 4 --learning_rate 0.001 --model_name np15000_graspness1e-1_bs4_lr1e-3_viewres_dataaug_fps_14C --dataset_root /data3/graspnet
\ No newline at end of file
+CUDA_VISIBLE_DEVICES=4 python train.py --camera kinect --log_dir logs/log_kn --batch_size 4 --learning_rate 0.001 --model_name minkuresnet --dataset_root /data3/graspnet
\ No newline at end of file
diff --git a/compute_ap.py b/compute_ap.py
deleted file mode 100644
index 8fed7f3..0000000
--- a/compute_ap.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import numpy as np
-import os
-
-acc_all = []
-# ap_scenes_path = '/data/zibo/cvpr22/logs/log_kn_graspnessrevised_pointnet2_v15_cs/' \
-# 'dump_epoch06_fps/ap_scenes'
-ap_scenes_path = '/home/zibo/graspness/logs/log_kn_v9/dump_epoch06/ap_scenes'
-print('For ', ap_scenes_path)
-for index in range(100, 130):
- acc_scene = []
- for i in range(0, 256):
- path = os.path.join(ap_scenes_path, 'scene_' + str(index).zfill(4), str(i).zfill(4) + '.npy')
- acc_c = np.load(path)
- acc_scene.append(acc_c)
- acc_all.append(acc_scene)
-
-acc_all = np.array(acc_all) * 100.
-# 90 scenes * 256 images * 50 top_k * 6 len(list_coe_of_friction = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2])
-print('acc shape: ', acc_all.shape)
-ap_all = np.mean(acc_all)
-# ap_seen_all = np.mean(acc_all[0:30])
-
-# ap_unseen_all = np.mean(acc_all[30:60])
-# ap_novel_all = np.mean(acc_all[60:90])
-print('AP: ', ap_all)
-# print('AP Seen: ', ap_seen_all)
-# print('AP Unseen: ', ap_unseen_all)
-# print('AP Novel: ', ap_novel_all)
-
-ap_all_2 = np.mean(acc_all[:, :, :, 0])
-ap_all_4 = np.mean(acc_all[:, :, :, 1])
-ap_all_8 = np.mean(acc_all[:, :, :, 3])
-ap_all_10 = np.mean(acc_all[:, :, :, 4])
-ap_all_12 = np.mean(acc_all[:, :, :, 5])
-
-#
-# ap_seen_2 = np.mean(acc_all[0:30, :, :, 0])
-# ap_seen_4 = np.mean(acc_all[0:30, :, :, 1])
-# ap_seen_8 = np.mean(acc_all[0:30, :, :, 3])
-#
-# ap_unseen_2 = np.mean(acc_all[30:60, :, :, 0])
-# ap_unseen_4 = np.mean(acc_all[30:60, :, :, 1])
-# ap_unseen_8 = np.mean(acc_all[30:60, :, :, 3])
-#
-#
-# ap_novel_2 = np.mean(acc_all[60:90, :, :, 0])
-# ap_novel_4 = np.mean(acc_all[60:90, :, :, 1])
-# ap_novel_8 = np.mean(acc_all[60:90, :, :, 3 ])
-#
-print('\nAP all 0.2: ', ap_all_2)
-print('AP all 0.4: ', ap_all_4)
-print('AP all 0.8: ', ap_all_8)
-print('AP all 1.0: ', ap_all_10)
-print('AP all 1.2: ', ap_all_12)
-#
-# print('\nAP Seen 0.2: ', ap_seen_2)
-# print('AP Seen 0.4: ', ap_seen_4)
-# print('AP Seen 0.8: ', ap_seen_8)
-#
-# print('\nAP Unseen 0.2: ', ap_unseen_2)
-# print('AP Unseen 0.4: ', ap_unseen_4)
-# print('AP Unseen 0.8: ', ap_unseen_8)
-#
-# print('\nAP Novel 0.2: ', ap_novel_2)
-# print('AP Novel 0.4: ', ap_novel_4)
-# print('AP Novel 0.8: ', ap_novel_8)
diff --git a/dataset/generate_graspness.py b/dataset/generate_graspness.py
index a546bb3..f75b480 100644
--- a/dataset/generate_graspness.py
+++ b/dataset/generate_graspness.py
@@ -10,17 +10,24 @@
import torch
from graspnetAPI.utils.xmlhandler import xmlReader
from graspnetAPI.utils.utils import get_obj_pose_list, transform_points
+import argparse
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--dataset_root', default=None, required=True)
+parser.add_argument('--camera_type', default='kinect', help='Camera split [realsense/kinect]')
if __name__ == '__main__':
- dataset_root = '/media/bot/980A6F5E0A6F38801/datasets/graspnet/'
- camera_type = 'realsense'
+ cfgs = parser.parse_args()
+ dataset_root = cfgs.dataset_root # set dataset root
+ camera_type = cfgs.camera_type # kinect / realsense
save_path_root = os.path.join(dataset_root, 'graspness')
num_views, num_angles, num_depths = 300, 12, 4
fric_coef_thresh = 0.6
point_grasp_num = num_views * num_angles * num_depths
- for scene_id in range(101):
+ for scene_id in range(100):
for ann_id in range(256):
# get scene point cloud
print('generating scene: {} ann: {}'.format(scene_id, ann_id))
diff --git a/dataset/graspnet_dataset.py b/dataset/graspnet_dataset.py
index 0fb9c45..67f4bf6 100644
--- a/dataset/graspnet_dataset.py
+++ b/dataset/graspnet_dataset.py
@@ -18,14 +18,13 @@
class GraspNetDataset(Dataset):
def __init__(self, root, grasp_labels=None, camera='kinect', split='train', num_points=20000,
- voxel_size=0.005, remove_outlier=True, remove_invisible=False, augment=False, load_label=True):
+ voxel_size=0.005, remove_outlier=True, augment=False, load_label=True):
assert (num_points <= 50000)
self.root = root
self.split = split
self.voxel_size = voxel_size
self.num_points = num_points
self.remove_outlier = remove_outlier
- self.remove_invisible = remove_invisible
self.grasp_labels = grasp_labels
self.camera = camera
self.augment = augment
@@ -42,8 +41,6 @@ def __init__(self, root, grasp_labels=None, camera='kinect', split='train', num_
self.sceneIds = list(range(130, 160))
elif split == 'test_novel':
self.sceneIds = list(range(160, 190))
- elif split == 'mini_test':
- self.sceneIds = list(range(100, 101))
self.sceneIds = ['scene_{}'.format(str(x).zfill(4)) for x in self.sceneIds]
self.depthpath = []
@@ -100,8 +97,7 @@ def __getitem__(self, index):
else:
return self.get_data(index)
- def get_data(self, index):
- # color = np.array(Image.open(self.colorpath[index]), dtype=np.float32) / 255.0
+ def get_data(self, index, return_raw_cloud=False):
depth = np.array(Image.open(self.depthpath[index]))
seg = np.array(Image.open(self.labelpath[index]))
meta = scio.loadmat(self.metapath[index])
@@ -120,7 +116,6 @@ def get_data(self, index):
# get valid points
depth_mask = (depth > 0)
- # seg_mask = (seg > 0)
if self.remove_outlier:
camera_poses = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'camera_poses.npy'))
align_mat = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'cam0_wrt_table.npy'))
@@ -130,8 +125,9 @@ def get_data(self, index):
else:
mask = depth_mask
cloud_masked = cloud[mask]
- # seg_masked = seg[mask]
+ if return_raw_cloud:
+ return cloud_masked
# sample points random
if len(cloud_masked) >= self.num_points:
idxs = np.random.choice(len(cloud_masked), self.num_points, replace=False)
@@ -139,9 +135,6 @@ def get_data(self, index):
idxs1 = np.arange(len(cloud_masked))
idxs2 = np.random.choice(len(cloud_masked), self.num_points - len(cloud_masked), replace=True)
idxs = np.concatenate([idxs1, idxs2], axis=0)
-
- # idxs_id = int(self.depthpath[index].split('scene_')[-1][:4]) * 256 + int(self.depthpath[index][-8:-4])
- # idxs = np.load('/data/cloud_inds_{}/{}.npy'.format(self.camera, idxs_id))[0] # for bot
cloud_sampled = cloud_masked[idxs]
ret_dict = {'point_clouds': cloud_sampled.astype(np.float32),
@@ -151,7 +144,6 @@ def get_data(self, index):
return ret_dict
def get_data_label(self, index):
- # color = np.array(Image.open(self.colorpath[index]), dtype=np.float32) / 255.0
depth = np.array(Image.open(self.depthpath[index]))
seg = np.array(Image.open(self.labelpath[index]))
meta = scio.loadmat(self.metapath[index])
@@ -182,7 +174,6 @@ def get_data_label(self, index):
else:
mask = depth_mask
cloud_masked = cloud[mask]
- # color_masked = color[mask]
seg_masked = seg[mask]
# sample points
@@ -193,7 +184,6 @@ def get_data_label(self, index):
idxs2 = np.random.choice(len(cloud_masked), self.num_points - len(cloud_masked), replace=True)
idxs = np.concatenate([idxs1, idxs2], axis=0)
cloud_sampled = cloud_masked[idxs]
- # color_sampled = color_masked[idxs]
seg_sampled = seg_masked[idxs]
graspness_sampled = graspness[idxs]
objectness_label = seg_sampled.copy()
@@ -211,15 +201,6 @@ def get_data_label(self, index):
points, widths, scores = self.grasp_labels[obj_idx]
collision = self.collision_labels[scene][i] # (Np, V, A, D)
- # remove invisible grasp points
- if self.remove_invisible:
- visible_mask = remove_invisible_grasp_points(cloud_sampled[seg_sampled == obj_idx], points,
- poses[:, :, i], th=0.01)
- points = points[visible_mask]
- widths = widths[visible_mask]
- scores = scores[visible_mask]
- collision = collision[visible_mask]
-
idxs = np.random.choice(len(points), min(max(int(len(points) / 4), 300), len(points)), replace=False)
grasp_points_list.append(points[idxs])
grasp_widths_list.append(widths[idxs])
diff --git a/dataset/simplify_dataset.py b/dataset/simplify_dataset.py
index 2281acb..0a7efd5 100644
--- a/dataset/simplify_dataset.py
+++ b/dataset/simplify_dataset.py
@@ -1,7 +1,10 @@
-from tqdm import tqdm
import numpy as np
import os
-import scipy.io as scio
+import argparse
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--dataset_root', default=None, required=True)
def simplify_grasp_labels(root, save_path):
@@ -33,7 +36,8 @@ def simplify_grasp_labels(root, save_path):
if __name__ == '__main__':
- root = '/media/bot/980A6F5E0A6F38801/datasets/graspnet/'
+ cfgs = parser.parse_args()
+ root = cfgs.dataset_root # set root and save path
save_path = os.path.join(root, 'grasp_label_simplified')
simplify_grasp_labels(root, save_path)
diff --git a/demo.py b/demo.py
deleted file mode 100644
index f257bcb..0000000
--- a/demo.py
+++ /dev/null
@@ -1,124 +0,0 @@
-""" Demo to show prediction results.
- Author: chenxi-wang
-"""
-
-import os
-import sys
-import numpy as np
-import open3d as o3d
-import argparse
-import importlib
-import scipy.io as scio
-from PIL import Image
-
-import torch
-from graspnetAPI import GraspGroup
-
-ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
-sys.path.append(os.path.join(ROOT_DIR, 'models'))
-sys.path.append(os.path.join(ROOT_DIR, 'dataset'))
-sys.path.append(os.path.join(ROOT_DIR, 'utils'))
-
-from graspnet import GraspNet, pred_decode
-from graspnet_dataset import GraspNetDataset
-from collision_detector import ModelFreeCollisionDetector
-from data_utils import CameraInfo, create_point_cloud_from_depth_image
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--checkpoint_path', required=True, help='Model checkpoint path')
-parser.add_argument('--num_point', type=int, default=20000, help='Point Number [default: 20000]')
-parser.add_argument('--num_view', type=int, default=300, help='View Number [default: 300]')
-parser.add_argument('--collision_thresh', type=float, default=0.01, help='Collision Threshold in collision detection [default: 0.01]')
-parser.add_argument('--voxel_size', type=float, default=0.01, help='Voxel Size to process point clouds before collision detection [default: 0.01]')
-cfgs = parser.parse_args()
-
-
-def get_net():
- # Init the model
- net = GraspNet(input_feature_dim=0, num_view=cfgs.num_view, num_angle=12, num_depth=4,
- cylinder_radius=0.05, hmin=-0.02, hmax_list=[0.01,0.02,0.03,0.04], is_training=False)
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
- net.to(device)
- # Load checkpoint
- checkpoint = torch.load(cfgs.checkpoint_path)
- net.load_state_dict(checkpoint['model_state_dict'])
- start_epoch = checkpoint['epoch']
- print("-> loaded checkpoint %s (epoch: %d)"%(cfgs.checkpoint_path, start_epoch))
- # set model to eval mode
- net.eval()
- return net
-
-def get_and_process_data(data_dir):
- # load data
- color = np.array(Image.open(os.path.join(data_dir, 'color.png')), dtype=np.float32) / 255.0
- depth = np.array(Image.open(os.path.join(data_dir, 'depth.png')))
- workspace_mask = np.array(Image.open(os.path.join(data_dir, 'workspace_mask.png')))
- meta = scio.loadmat(os.path.join(data_dir, 'meta.mat'))
- intrinsic = meta['intrinsic_matrix']
- factor_depth = meta['factor_depth']
-
- # generate cloud
- camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2], factor_depth)
- cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)
-
- # get valid points
- mask = (workspace_mask & (depth > 0))
- cloud_masked = cloud[mask]
- color_masked = color[mask]
-
- # sample points
- if len(cloud_masked) >= cfgs.num_point:
- idxs = np.random.choice(len(cloud_masked), cfgs.num_point, replace=False)
- else:
- idxs1 = np.arange(len(cloud_masked))
- idxs2 = np.random.choice(len(cloud_masked), cfgs.num_point-len(cloud_masked), replace=True)
- idxs = np.concatenate([idxs1, idxs2], axis=0)
- cloud_sampled = cloud_masked[idxs]
- color_sampled = color_masked[idxs]
-
- # convert data
- cloud = o3d.geometry.PointCloud()
- cloud.points = o3d.utility.Vector3dVector(cloud_masked.astype(np.float32))
- cloud.colors = o3d.utility.Vector3dVector(color_masked.astype(np.float32))
- end_points = dict()
- cloud_sampled = torch.from_numpy(cloud_sampled[np.newaxis].astype(np.float32))
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
- cloud_sampled = cloud_sampled.to(device)
- end_points['point_clouds'] = cloud_sampled
- end_points['cloud_colors'] = color_sampled
-
- return end_points, cloud
-
-def get_grasps(net, end_points):
- # Forward pass
- with torch.no_grad():
- end_points = net(end_points)
- grasp_preds = pred_decode(end_points)
- gg_array = grasp_preds[0].detach().cpu().numpy()
- gg = GraspGroup(gg_array)
- return gg
-
-def collision_detection(gg, cloud):
- mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size)
- collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=cfgs.collision_thresh)
- gg = gg[~collision_mask]
- return gg
-
-def vis_grasps(gg, cloud):
- gg.nms()
- gg.sort_by_score()
- gg = gg[:50]
- grippers = gg.to_open3d_geometry_list()
- o3d.visualization.draw_geometries([cloud, *grippers])
-
-def demo(data_dir):
- net = get_net()
- end_points, cloud = get_and_process_data(data_dir)
- gg = get_grasps(net, end_points)
- if cfgs.collision_thresh > 0:
- gg = collision_detection(gg, np.array(cloud.points))
- vis_grasps(gg, cloud)
-
-if __name__=='__main__':
- data_dir = 'doc/example_data'
- demo(data_dir)
diff --git a/models/backbone.py b/models/backbone.py
deleted file mode 100644
index 859190a..0000000
--- a/models/backbone.py
+++ /dev/null
@@ -1,131 +0,0 @@
-""" PointNet2 backbone for feature learning.
- Author: Charles R. Qi
-"""
-import os
-import sys
-import torch
-import torch.nn as nn
-
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-ROOT_DIR = os.path.dirname(BASE_DIR)
-sys.path.append(ROOT_DIR)
-
-from pointnet2.pointnet2_modules import PointnetSAModuleVotes, PointnetFPModule
-
-
-class Pointnet2Backbone(nn.Module):
- r"""
- Backbone network for point cloud feature learning.
- Based on Pointnet++ single-scale grouping network.
-
- Parameters
- ----------
- input_feature_dim: int
- Number of input channels in the feature descriptor for each point.
- e.g. 3 for RGB.
- """
- def __init__(self, input_feature_dim=0):
- super().__init__()
-
- self.sa1 = PointnetSAModuleVotes(
- npoint=2048,
- radius=0.04,
- nsample=64,
- mlp=[input_feature_dim, 64, 64, 128],
- use_xyz=True,
- normalize_xyz=True
- )
-
- self.sa2 = PointnetSAModuleVotes(
- npoint=1024,
- radius=0.1,
- nsample=32,
- mlp=[128, 128, 128, 256],
- use_xyz=True,
- normalize_xyz=True
- )
-
- self.sa3 = PointnetSAModuleVotes(
- npoint=512,
- radius=0.2,
- nsample=16,
- mlp=[256, 128, 128, 256],
- use_xyz=True,
- normalize_xyz=True
- )
-
- self.sa4 = PointnetSAModuleVotes(
- npoint=256,
- radius=0.3,
- nsample=16,
- mlp=[256, 128, 128, 256],
- use_xyz=True,
- normalize_xyz=True
- )
-
- self.fp1 = PointnetFPModule(mlp=[256+256,256,256])
- self.fp2 = PointnetFPModule(mlp=[256+256,256,256])
-
- def _break_up_pc(self, pc):
- xyz = pc[..., 0:3].contiguous()
- features = (
- pc[..., 3:].transpose(1, 2).contiguous()
- if pc.size(-1) > 3 else None
- )
-
- return xyz, features
-
- def forward(self, pointcloud: torch.cuda.FloatTensor, end_points=None):
- r"""
- Forward pass of the network
-
- Parameters
- ----------
- pointcloud: Variable(torch.cuda.FloatTensor)
- (B, N, 3 + input_feature_dim) tensor
- Point cloud to run predicts on
- Each point in the point-cloud MUST
- be formated as (x, y, z, features...)
-
- Returns
- ----------
- end_points: {XXX_xyz, XXX_features, XXX_inds}
- XXX_xyz: float32 Tensor of shape (B,K,3)
- XXX_features: float32 Tensor of shape (B,D,K)
- XXX_inds: int64 Tensor of shape (B,K) values in [0,N-1]
- """
- if not end_points: end_points = {}
- batch_size = pointcloud.shape[0]
-
- xyz, features = self._break_up_pc(pointcloud)
- end_points['input_xyz'] = xyz
- end_points['input_features'] = features
-
- # --------- 4 SET ABSTRACTION LAYERS ---------
- xyz, features, fps_inds = self.sa1(xyz, features)
- end_points['sa1_inds'] = fps_inds
- end_points['sa1_xyz'] = xyz
- end_points['sa1_features'] = features
-
- xyz, features, fps_inds = self.sa2(xyz, features) # this fps_inds is just 0,1,...,1023
- end_points['sa2_inds'] = fps_inds
- end_points['sa2_xyz'] = xyz
- end_points['sa2_features'] = features
-
- xyz, features, fps_inds = self.sa3(xyz, features) # this fps_inds is just 0,1,...,511
- end_points['sa3_xyz'] = xyz
- end_points['sa3_features'] = features
-
- xyz, features, fps_inds = self.sa4(xyz, features) # this fps_inds is just 0,1,...,255
- end_points['sa4_xyz'] = xyz
- end_points['sa4_features'] = features
-
- # --------- 2 FEATURE UPSAMPLING LAYERS --------
- features = self.fp1(end_points['sa3_xyz'], end_points['sa4_xyz'], end_points['sa3_features'], end_points['sa4_features'])
- features = self.fp2(end_points['sa2_xyz'], end_points['sa3_xyz'], end_points['sa2_features'], features)
- end_points['fp2_features'] = features
- end_points['fp2_xyz'] = end_points['sa2_xyz']
- num_seed = end_points['fp2_xyz'].shape[1]
- end_points['fp2_inds'] = end_points['sa1_inds'][:,0:num_seed] # indices among the entire input point clouds
-
- return features, end_points['fp2_xyz'], end_points
\ No newline at end of file
diff --git a/models/graspnet.py b/models/graspnet.py
index 20961bc..a3a20a5 100644
--- a/models/graspnet.py
+++ b/models/graspnet.py
@@ -13,7 +13,7 @@
ROOT_DIR = os.path.dirname(BASE_DIR)
sys.path.append(ROOT_DIR)
-from models.backbone_resunet14 import MinkUNet14, MinkUNet14D, MinkUNet14C
+from models.backbone_resunet14 import MinkUNet14D
from models.modules import ApproachNet, GraspableNet, CloudCrop, SWADNet
from loss_utils import GRASP_MAX_WIDTH, NUM_VIEW, NUM_ANGLE, NUM_DEPTH, GRASPNESS_THRESHOLD, M_POINT
from label_generation import process_grasp_labels, match_grasp_view_and_label, batch_viewpoint_params_to_matrix
@@ -24,7 +24,6 @@ class GraspNet(nn.Module):
def __init__(self, cylinder_radius=0.05, seed_feat_dim=512, is_training=True):
super().__init__()
self.is_training = is_training
- # self.log_string = log_string
self.seed_feature_dim = seed_feat_dim
self.num_depth = NUM_DEPTH
self.num_angle = NUM_ANGLE
@@ -58,26 +57,10 @@ def forward(self, end_points):
seed_features_graspable = []
seed_xyz_graspable = []
- # seed_inds_graspable = []
graspable_num_batch = 0.
for i in range(B):
cur_mask = graspable_mask[i]
- # inds = torch.arange(point_num).to(objectness_score.device)
- # graspable_num = cur_mask.sum()
- # graspable_num_batch += graspable_num
graspable_num_batch += cur_mask.sum()
- # if graspable_num < 200:
- # if self.log_string is None:
- # print('Warning!!! Two few graspable points! only {}'.format(graspable_num))
- # else:
- # self.log_string('Warning!!! Two few graspable points! only {}'.format(graspable_num))
- # cur_mask_danger = cur_mask.detach().clone()
- # cur_mask_danger[:800] = True
- # graspable_num = cur_mask_danger.sum()
- # cur_feat = seed_features_flipped[i][cur_mask_danger]
- # cur_seed_xyz = seed_xyz[i][cur_mask_danger]
- # # cur_inds = inds[cur_mask_danger]
- # else:
cur_feat = seed_features_flipped[i][cur_mask] # Ns*feat_dim
cur_seed_xyz = seed_xyz[i][cur_mask] # Ns*3
@@ -89,28 +72,15 @@ def forward(self, end_points):
cur_feat_flipped = cur_feat.unsqueeze(0).transpose(1, 2).contiguous() # 1*feat_dim*Ns
cur_feat = gather_operation(cur_feat_flipped, fps_idxs).squeeze(0).contiguous() # feat_dim*Ns
- # random sample M_points
- # if graspable_num >= self.M_points:
- # idxs = torch.multinomial(torch.ones(graspable_num), self.M_points, replacement=False)
- # else:
- # idxs1 = torch.arange(graspable_num)
- # idxs2 = torch.multinomial(torch.ones(graspable_num), self.M_points - graspable_num, replacement=True)
- # idxs = torch.cat([idxs1, idxs2])
- # cur_feat = cur_feat[idxs]
- # cur_seed_xyz = cur_seed_xyz[idxs]
-
seed_features_graspable.append(cur_feat)
seed_xyz_graspable.append(cur_seed_xyz)
seed_xyz_graspable = torch.stack(seed_xyz_graspable, 0) # B*Ns*3
seed_features_graspable = torch.stack(seed_features_graspable) # B*feat_dim*Ns
- # seed_features_graspable = seed_features_graspable.transpose(1, 2) # don't need transpose for FPS sample
end_points['xyz_graspable'] = seed_xyz_graspable
- # end_points['inds_graspable'] = seed_inds_graspable
end_points['graspable_count_stage1'] = graspable_num_batch / B
end_points, res_feat = self.rotation(seed_features_graspable, end_points)
seed_features_graspable = seed_features_graspable + res_feat # residual feat from view selection
- # end_points = self.rotation(seed_features_graspable, end_points)
if self.is_training:
end_points = process_grasp_labels(end_points)
@@ -137,7 +107,7 @@ def pred_decode(end_points):
grasp_angle = (grasp_score_inds // NUM_DEPTH) * np.pi / 12
grasp_depth = (grasp_score_inds % NUM_DEPTH + 1) * 0.01
grasp_depth = grasp_depth.view(-1, 1)
- grasp_width = 1.2 * end_points['grasp_width_pred'][i] / 10. # grasp width gt has been multiplied by 10
+ grasp_width = 1.2 * end_points['grasp_width_pred'][i] / 10.
grasp_width = grasp_width.view(M_POINT, NUM_ANGLE*NUM_DEPTH)
grasp_width = torch.gather(grasp_width, 1, grasp_score_inds.view(-1, 1))
grasp_width = torch.clamp(grasp_width, min=0., max=GRASP_MAX_WIDTH)
diff --git a/models/loss.py b/models/loss.py
index 60966b4..7f589f4 100644
--- a/models/loss.py
+++ b/models/loss.py
@@ -1,9 +1,5 @@
-""" Loss functions for training.
- Author: chenxi-wang
-"""
-
-import torch
import torch.nn as nn
+import torch
def get_loss(end_points):
@@ -24,12 +20,12 @@ def compute_objectness_loss(end_points):
loss = criterion(objectness_score, objectness_label)
end_points['loss/stage1_objectness_loss'] = loss
- # objectness_pred = torch.argmax(objectness_score, 1)
- # end_points['stage1_objectness_acc'] = (objectness_pred == objectness_label.long()).float().mean()
- # end_points['stage1_objectness_prec'] = (objectness_pred == objectness_label.long())[
- # objectness_pred == 1].float().mean()
- # end_points['stage1_objectness_recall'] = (objectness_pred == objectness_label.long())[
- # objectness_label == 1].float().mean()
+ objectness_pred = torch.argmax(objectness_score, 1)
+ end_points['stage1_objectness_acc'] = (objectness_pred == objectness_label.long()).float().mean()
+ end_points['stage1_objectness_prec'] = (objectness_pred == objectness_label.long())[
+ objectness_pred == 1].float().mean()
+ end_points['stage1_objectness_recall'] = (objectness_pred == objectness_label.long())[
+ objectness_label == 1].float().mean()
return loss, end_points
@@ -42,21 +38,14 @@ def compute_graspness_loss(end_points):
loss = loss[loss_mask]
loss = loss.mean()
- # graspness_score_c = graspness_score.detach().clone()[loss_mask]
- # graspness_label_c = graspness_label.detach().clone()[loss_mask]
- # graspness_score_c = torch.clamp(graspness_score_c, 0., 0.99)
- # graspness_label_c = torch.clamp(graspness_label_c, 0., 0.99)
- # rank_error = (torch.abs(torch.trunc(graspness_score_c * 20) - torch.trunc(graspness_label_c * 20)) / 20.).mean()
- # end_points['stage1_graspness_acc_rank_error'] = rank_error
-
- # graspness_score_c[graspness_score_c > 0.15] = 1
- # graspness_score_c[graspness_score_c <= 0.15] = 0
- # graspness_label_c[graspness_label_c > 0.15] = 1
- # graspness_label_c[graspness_label_c <= 0.15] = 0
- # end_points['graspness_acc'] = (graspness_score_c.bool() == graspness_label_c.bool()).float().mean()
+ graspness_score_c = graspness_score.detach().clone()[loss_mask]
+ graspness_label_c = graspness_label.detach().clone()[loss_mask]
+ graspness_score_c = torch.clamp(graspness_score_c, 0., 0.99)
+ graspness_label_c = torch.clamp(graspness_label_c, 0., 0.99)
+ rank_error = (torch.abs(torch.trunc(graspness_score_c * 20) - torch.trunc(graspness_label_c * 20)) / 20.).mean()
+ end_points['stage1_graspness_acc_rank_error'] = rank_error
end_points['loss/stage1_graspness_loss'] = loss
-
return loss, end_points
@@ -74,19 +63,15 @@ def compute_score_loss(end_points):
grasp_score_pred = end_points['grasp_score_pred']
grasp_score_label = end_points['batch_grasp_score']
loss = criterion(grasp_score_pred, grasp_score_label)
- # loss_mask = end_points['objectness_label'].bool()
- # loss_mask = torch.gather(loss_mask, 1, end_points['inds_graspable'])
- # loss = loss[loss_mask].mean()
+
end_points['loss/stage3_score_loss'] = loss
- # end_points['loss/stage3_score_loss_point_num'] = loss_mask.sum() / B
return loss, end_points
def compute_width_loss(end_points):
criterion = nn.SmoothL1Loss(reduction='none')
grasp_width_pred = end_points['grasp_width_pred']
- # norm by cylinder radius(Crop module) and norm to 0~1, original with is 0~0.1, /0.05->0~2, /2->0~1
- grasp_width_label = end_points['batch_grasp_width'] * 10 # norm by cylinder radius(Crop module)
+ grasp_width_label = end_points['batch_grasp_width'] * 10
loss = criterion(grasp_width_pred, grasp_width_label)
grasp_score_label = end_points['batch_grasp_score']
loss_mask = grasp_score_label > 0
diff --git a/models/modules.py b/models/modules.py
index 0747c71..3be1789 100644
--- a/models/modules.py
+++ b/models/modules.py
@@ -21,16 +21,9 @@ class GraspableNet(nn.Module):
def __init__(self, seed_feature_dim):
super().__init__()
self.in_dim = seed_feature_dim
- # self.conv1 = nn.Conv1d(self.in_dim, 128, 1)
self.conv_graspable = nn.Conv1d(self.in_dim, 3, 1)
def forward(self, seed_features, end_points):
- # features = F.relu(self.bn1(self.conv1(seed_features)), inplace=True)
- # features = F.relu(self.bn2(self.conv2(features)), inplace=True)
- # features = F.relu(self.conv1(seed_features), inplace=True)
- # features = F.relu(self.conv2(features), inplace=True)
-
- # seed_features = F.relu(self.conv1(seed_features), inplace=True)
graspable_score = self.conv_graspable(seed_features) # (B, 3, num_seed)
end_points['objectness_score'] = graspable_score[:, :2]
end_points['graspness_score'] = graspable_score[:, 2]
@@ -39,14 +32,6 @@ def forward(self, seed_features, end_points):
class ApproachNet(nn.Module):
def __init__(self, num_view, seed_feature_dim, is_training=True):
- """ Approach vector estimation from seed point features.
-
- Input:
- num_view: [int]
- number of views generated from each each seed point
- seed_feature_dim: [int]
- number of channels of seed point features
- """
super().__init__()
self.num_view = num_view
self.in_dim = seed_feature_dim
@@ -55,23 +40,12 @@ def __init__(self, num_view, seed_feature_dim, is_training=True):
self.conv2 = nn.Conv1d(self.in_dim, self.num_view, 1)
def forward(self, seed_features, end_points):
- """ Forward pass.
-
- Input:
- seed_features: [torch.FloatTensor, (batch_size,feature_dim,num_seed)
- features of seed points
- end_points: [dict]
-
- Output:
- end_points: [dict]
- """
B, _, num_seed = seed_features.size()
res_features = F.relu(self.conv1(seed_features), inplace=True)
features = self.conv2(res_features)
view_score = features.transpose(1, 2).contiguous() # (B, num_seed, num_view)
end_points['view_score'] = view_score
- # print(view_score.min(), view_score.max(), view_score.mean())
if self.is_training:
# normalize view graspness score to 0~1
view_score_ = view_score.clone().detach()
@@ -104,20 +78,6 @@ def forward(self, seed_features, end_points):
class CloudCrop(nn.Module):
- """ Cylinder group and align for grasp configure estimation. Return a list of grouped points with different cropping depths.
- Input:
- nsample: [int]
- sample number in a group
- seed_feature_dim: [int]
- number of channels of grouped points
- cylinder_radius: [float]
- radius of the cylinder space
- hmin: [float]
- height of the bottom surface
- hmax_list: [list of float]
- list of heights of the upper surface
- """
-
def __init__(self, nsample, seed_feature_dim, cylinder_radius=0.05, hmin=-0.02, hmax=0.04):
super().__init__()
self.nsample = nsample
@@ -125,25 +85,11 @@ def __init__(self, nsample, seed_feature_dim, cylinder_radius=0.05, hmin=-0.02,
self.cylinder_radius = cylinder_radius
mlps = [3 + self.in_dim, 256, 256] # use xyz, so plus 3
- # returned group features without xyz, normalize xyz as graspness paper
- # (i don't know if features with xyz will be better)
self.grouper = CylinderQueryAndGroup(radius=cylinder_radius, hmin=hmin, hmax=hmax, nsample=nsample,
use_xyz=True, normalize_xyz=True)
self.mlps = pt_utils.SharedMLP(mlps, bn=True)
def forward(self, seed_xyz_graspable, seed_features_graspable, vp_rot):
- """ Forward pass.
- Input:
- seed_xyz: [torch.FloatTensor, (batch_size,num_seed,3)]
- coordinates of seed points
- pointcloud: [torch.FloatTensor, (batch_size,num_seed,3)]
- the points to be cropped
- vp_rot: [torch.FloatTensor, (batch_size,num_seed,3,3)]
- rotation matrices generated from approach vectors
- Output:
- vp_features: [torch.FloatTensor, (batch_size,num_features,num_seed,num_depth)]
- features of grouped points in different depths
- """
grouped_feature = self.grouper(seed_xyz_graspable, seed_xyz_graspable, vp_rot,
seed_features_graspable) # B*3 + feat_dim*M*K
new_features = self.mlps(grouped_feature) # (batch_size, mlps[-1], M, K)
@@ -153,15 +99,6 @@ def forward(self, seed_xyz_graspable, seed_features_graspable, vp_rot):
class SWADNet(nn.Module):
- """ Grasp configure estimation.
-
- Input:
- num_angle: [int]
- number of in-plane rotation angle classes
- the value of the i-th class --> i*PI/num_angle (i=0,...,num_angle-1)
- num_depth: [int]
- number of gripper depth classes
- """
def __init__(self, num_angle, num_depth):
super().__init__()
self.num_angle = num_angle
@@ -171,16 +108,6 @@ def __init__(self, num_angle, num_depth):
self.conv_swad = nn.Conv1d(256, 2*num_angle*num_depth, 1)
def forward(self, vp_features, end_points):
- """ Forward pass.
-
- Input:
- vp_features: [torch.FloatTensor, (batch_size,num_seed,3)]
- features of grouped points in different depths
- end_points: [dict]
-
- Output:
- end_points: [dict]
- """
B, _, num_seed = vp_features.size()
vp_features = F.relu(self.conv1(vp_features), inplace=True)
vp_features = self.conv_swad(vp_features)
diff --git a/models/resnet.py b/models/resnet.py
index c5acb98..6e57827 100644
--- a/models/resnet.py
+++ b/models/resnet.py
@@ -1,9 +1,4 @@
-import os
-import numpy as np
-
-import torch
import torch.nn as nn
-from torch.optim import SGD
try:
import open3d as o3d
diff --git a/requirements.txt b/requirements.txt
index 1a8e892..2b9704a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ scipy
open3d>=0.8
Pillow
tqdm
+MinkowskiEngine==0.5.4
\ No newline at end of file
diff --git a/test.py b/test.py
index e16d1ec..11b7f05 100644
--- a/test.py
+++ b/test.py
@@ -5,8 +5,7 @@
import time
import torch
from torch.utils.data import DataLoader
-# from graspnetAPI.graspnet_eval import GraspGroup, GraspNetEval
-from graspnetAPI.graspnet_eval_zibo import GraspGroup, GraspNetEval # change the library code to save eval results
+from graspnetAPI.graspnet_eval import GraspGroup, GraspNetEval
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(ROOT_DIR, 'pointnet2'))
@@ -16,14 +15,12 @@
from models.graspnet import GraspNet, pred_decode
from dataset.graspnet_dataset import GraspNetDataset, minkowski_collate_fn
-# from utils.collision_detector import ModelFreeCollisionDetector
+from utils.collision_detector import ModelFreeCollisionDetector
parser = argparse.ArgumentParser()
-parser.add_argument('--dataset_root', default='/media/bot/980A6F5E0A6F38801/datasets/graspnet')
-parser.add_argument('--checkpoint_path', help='Model checkpoint path',
- default='/data/zibo/logs/log_kn_v7/np15000_dim512_graspness1e-1_M1024_bs2_lr5e-4_viewres_dataaug_fps_epoch05.tar')
-parser.add_argument('--dump_dir', help='Dump dir to save outputs',
- default='/data/zibo/logs/log_kn_v7/dump_epoch05')
+parser.add_argument('--dataset_root', default=None, required=True)
+parser.add_argument('--checkpoint_path', help='Model checkpoint path', default=None, required=True)
+parser.add_argument('--dump_dir', help='Dump dir to save outputs', default=None, required=True)
parser.add_argument('--seed_feat_dim', default=512, type=int, help='Point wise feature dim')
parser.add_argument('--camera', default='kinect', help='Camera split [realsense/kinect]')
parser.add_argument('--num_point', type=int, default=15000, help='Point Number [default: 15000]')
@@ -48,7 +45,7 @@ def my_worker_init_fn(worker_id):
def inference():
- test_dataset = GraspNetDataset(cfgs.dataset_root, split='mini_test', camera=cfgs.camera, num_points=cfgs.num_point,
+ test_dataset = GraspNetDataset(cfgs.dataset_root, split='test_seen', camera=cfgs.camera, num_points=cfgs.num_point,
voxel_size=cfgs.voxel_size, remove_outlier=True, augment=False, load_label=False)
print('Test dataset length: ', len(test_dataset))
scene_list = test_dataset.scene_list()
@@ -90,11 +87,11 @@ def inference():
gg = GraspGroup(preds)
# collision detection
- # if cfgs.collision_thresh > 0:
- # cloud, _ = test.get_data(data_idx, return_raw_cloud=True)
- # mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size)
- # collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=cfgs.collision_thresh)
- # gg = gg[~collision_mask]
+ if cfgs.collision_thresh > 0:
+ cloud = test_dataset.get_data(data_idx, return_raw_cloud=True)
+ mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size_cd)
+ collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=cfgs.collision_thresh)
+ gg = gg[~collision_mask]
# save grasps
save_dir = os.path.join(cfgs.dump_dir, scene_list[data_idx], cfgs.camera)
@@ -109,16 +106,15 @@ def inference():
tic = time.time()
-def evaluate(dump_dir): # changed the graspnetAPI code, it can directly save evaluating results
+def evaluate(dump_dir):
ge = GraspNetEval(root=cfgs.dataset_root, camera=cfgs.camera, split='test_seen')
- # ge.eval_seen(dump_folder=dump_dir, proc=6)
- ge.eval_scene(scene_id=100, dump_folder=dump_dir)
+ res, ap = ge.eval_seen(dump_folder=dump_dir, proc=6)
+ save_dir = os.path.join(cfgs.dump_dir, 'ap_{}.npy'.format(cfgs.camera))
+ np.save(save_dir, res)
if __name__ == '__main__':
- # python test.py --infer --eval
if cfgs.infer:
inference()
- # python test.py --dump_dir logs/dump_kn_decouple_quat/
if cfgs.eval:
evaluate(cfgs.dump_dir)
diff --git a/train.py b/train.py
index 011a124..1755c1d 100644
--- a/train.py
+++ b/train.py
@@ -20,13 +20,11 @@
from dataset.graspnet_dataset import GraspNetDataset, minkowski_collate_fn, load_grasp_labels
parser = argparse.ArgumentParser()
-parser.add_argument('--dataset_root', default='/media/bot/980A6F5E0A6F38801/datasets/graspnet')
+parser.add_argument('--dataset_root', default=None, required=True)
parser.add_argument('--camera', default='kinect', help='Camera split [realsense/kinect]')
-parser.add_argument('--checkpoint_path', help='Model checkpoint path',
- default='/data/zibo/logs/log_kn_v8/np15000_graspness1e-1_bs2_lr7e-4_viewres_dataaug_fps_14D_epoch03.tar')
-parser.add_argument('--model_name', type=str,
- default='np15000_graspness1e-1_bs2_lr7e-4_viewres_dataaug_fps_14D')
-parser.add_argument('--log_dir', default='/data/zibo/logs/log_kn_v8')
+parser.add_argument('--checkpoint_path', help='Model checkpoint path', default=None)
+parser.add_argument('--model_name', type=str, default=None)
+parser.add_argument('--log_dir', default='logs/log')
parser.add_argument('--num_point', type=int, default=15000, help='Point Number [default: 20000]')
parser.add_argument('--seed_feat_dim', default=512, type=int, help='Point wise feature dim')
parser.add_argument('--voxel_size', type=float, default=0.005, help='Voxel Size to process point clouds ')
@@ -35,7 +33,6 @@
parser.add_argument('--learning_rate', type=float, default=0.0007, help='Initial learning rate [default: 0.001]')
parser.add_argument('--resume', default=0, type=int, help='Whether to resume from checkpoint')
cfgs = parser.parse_args()
-
# ------------------------------------------------------------------------- GLOBAL CONFIG BEG
EPOCH_CNT = 0
CHECKPOINT_PATH = cfgs.checkpoint_path if cfgs.checkpoint_path is not None and cfgs.resume else None
@@ -59,10 +56,9 @@ def my_worker_init_fn(worker_id):
grasp_labels = load_grasp_labels(cfgs.dataset_root)
-# grasp_labels=None
TRAIN_DATASET = GraspNetDataset(cfgs.dataset_root, grasp_labels=grasp_labels, camera=cfgs.camera, split='train',
num_points=cfgs.num_point, voxel_size=cfgs.voxel_size,
- remove_outlier=True, augment=True, remove_invisible=False, load_label=True)
+ remove_outlier=True, augment=True, load_label=True)
print('train dataset length: ', len(TRAIN_DATASET))
TRAIN_DATALOADER = DataLoader(TRAIN_DATASET, batch_size=cfgs.batch_size, shuffle=True,
num_workers=0, worker_init_fn=my_worker_init_fn, collate_fn=minkowski_collate_fn)
diff --git a/utils/label_generation.py b/utils/label_generation.py
index 865cf45..508c638 100644
--- a/utils/label_generation.py
+++ b/utils/label_generation.py
@@ -22,7 +22,6 @@ def process_grasp_labels(end_points):
batch_size, num_samples, _ = seed_xyzs.size()
batch_grasp_points = []
- # batch_grasp_views = []
batch_grasp_views_rot = []
batch_grasp_scores = []
batch_grasp_widths = []
@@ -32,7 +31,6 @@ def process_grasp_labels(end_points):
# get merged grasp points for label computation
grasp_points_merged = []
- # grasp_views_merged = []
grasp_views_rot_merged = []
grasp_scores_merged = []
grasp_widths_merged = []
@@ -55,8 +53,6 @@ def process_grasp_labels(end_points):
grasp_views_ = grasp_views.transpose(0, 1).contiguous().unsqueeze(0)
grasp_views_trans_ = grasp_views_trans.transpose(0, 1).contiguous().unsqueeze(0)
view_inds = knn(grasp_views_trans_, grasp_views_, k=1).squeeze() - 1
- # grasp_views_trans = torch.index_select(grasp_views_trans, 0, view_inds) # (V, 3)
- # grasp_views_trans = grasp_views_trans.unsqueeze(0).expand(num_grasp_points, -1, -1) # (Np, V, 3)
grasp_views_rot_trans = torch.index_select(grasp_views_rot_trans, 0, view_inds) # (V, 3, 3)
grasp_views_rot_trans = grasp_views_rot_trans.unsqueeze(0).expand(num_grasp_points, -1, -1,
-1) # (Np, V, 3, 3)
@@ -64,13 +60,11 @@ def process_grasp_labels(end_points):
grasp_widths = torch.index_select(grasp_widths, 1, view_inds) # (Np, V, A, D)
# add to list
grasp_points_merged.append(grasp_points_trans)
- # grasp_views_merged.append(grasp_views_trans)
grasp_views_rot_merged.append(grasp_views_rot_trans)
grasp_scores_merged.append(grasp_scores)
grasp_widths_merged.append(grasp_widths)
grasp_points_merged = torch.cat(grasp_points_merged, dim=0) # (Np', 3)
- # grasp_views_merged = torch.cat(grasp_views_merged, dim=0) # (Np', V, 3)
grasp_views_rot_merged = torch.cat(grasp_views_rot_merged, dim=0) # (Np', V, 3, 3)
grasp_scores_merged = torch.cat(grasp_scores_merged, dim=0) # (Np', V, A, D)
grasp_widths_merged = torch.cat(grasp_widths_merged, dim=0) # (Np', V, A, D)
@@ -82,20 +76,17 @@ def process_grasp_labels(end_points):
# assign anchor points to real points
grasp_points_merged = torch.index_select(grasp_points_merged, 0, nn_inds) # (Ns, 3)
- # grasp_views_merged = torch.index_select(grasp_views_merged, 0, nn_inds) # (Ns, V, 3)
grasp_views_rot_merged = torch.index_select(grasp_views_rot_merged, 0, nn_inds) # (Ns, V, 3, 3)
grasp_scores_merged = torch.index_select(grasp_scores_merged, 0, nn_inds) # (Ns, V, A, D)
grasp_widths_merged = torch.index_select(grasp_widths_merged, 0, nn_inds) # (Ns, V, A, D)
# add to batch
batch_grasp_points.append(grasp_points_merged)
- # batch_grasp_views.append(grasp_views_merged)
batch_grasp_views_rot.append(grasp_views_rot_merged)
batch_grasp_scores.append(grasp_scores_merged)
batch_grasp_widths.append(grasp_widths_merged)
batch_grasp_points = torch.stack(batch_grasp_points, 0) # (B, Ns, 3)
- # batch_grasp_views = torch.stack(batch_grasp_views, 0) # (B, Ns, V, 3)
batch_grasp_views_rot = torch.stack(batch_grasp_views_rot, 0) # (B, Ns, V, 3, 3)
batch_grasp_scores = torch.stack(batch_grasp_scores, 0) # (B, Ns, V, A, D)
batch_grasp_widths = torch.stack(batch_grasp_widths, 0) # (B, Ns, V, A, D)
@@ -111,13 +102,9 @@ def process_grasp_labels(end_points):
view_graspness_max = view_graspness_max.unsqueeze(-1).expand(-1, -1, 300) # (B, Ns, V)
view_graspness_min = view_graspness_min.unsqueeze(-1).expand(-1, -1, 300) # same shape as batch_grasp_view_graspness
batch_grasp_view_graspness = (batch_grasp_view_graspness - view_graspness_min) / (view_graspness_max - view_graspness_min + 1e-5)
- # batch_grasp_view_graspness = torch.clamp(batch_grasp_view_graspness, 0., 1.)
# process scores
label_mask = (batch_grasp_scores > 0) & (batch_grasp_widths <= GRASP_MAX_WIDTH) # (B, Ns, V, A, D)
- # u_max = batch_grasp_scores.max()
- # u_min = batch_grasp_scores[label_mask].min()
- # batch_grasp_scores[label_mask] = torch.log(u_max / batch_grasp_scores[label_mask]) / torch.log(u_max / u_min)
batch_grasp_scores[~label_mask] = 0
end_points['batch_grasp_point'] = batch_grasp_points
diff --git a/vis_and_analysis.py b/vis_and_analysis.py
deleted file mode 100644
index da49baa..0000000
--- a/vis_and_analysis.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import numpy as np
-from graspnetAPI import GraspGroup
-import open3d as o3d
-import scipy.io as scio
-from PIL import Image
-import os
-from utils.data_utils import CameraInfo, create_point_cloud_from_depth_image, get_workspace_mask
-
-data_path = '/media/bot/980A6F5E0A6F38801/datasets/graspnet'
-dump_dir = '/data/zibo/logs/log_kn_v1/dump_epoch05_fps/'
-scene_id = 'scene_0101'
-ann_id = '0000'
-
-poses = np.load(os.path.join(dump_dir, scene_id, 'kinect', ann_id + '.npy'))
-
-# vis
-color = np.array(Image.open(os.path.join(data_path, 'scenes', scene_id, 'kinect', 'rgb', ann_id + '.png')), dtype=np.float32) / 255.0
-depth = np.array(Image.open(os.path.join(data_path, 'scenes', scene_id, 'kinect', 'depth', ann_id + '.png')))
-seg = np.array(Image.open(os.path.join(data_path, 'scenes', scene_id, 'kinect', 'label', ann_id + '.png')))
-meta = scio.loadmat(os.path.join(data_path, 'scenes', scene_id, 'kinect', 'meta', ann_id + '.mat'))
-intrinsic = meta['intrinsic_matrix']
-factor_depth = meta['factor_depth']
-camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2], factor_depth)
-point_cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)
-depth_mask = (depth > 0)
-camera_poses = np.load(os.path.join(data_path, 'scenes', scene_id, 'kinect', 'camera_poses.npy'))
-align_mat = np.load(os.path.join(data_path, 'scenes', scene_id, 'kinect', 'cam0_wrt_table.npy'))
-trans = np.dot(align_mat, camera_poses[int(ann_id)])
-workspace_mask = get_workspace_mask(point_cloud, seg, trans=trans, organized=True, outlier=0.02)
-mask = (depth_mask & workspace_mask)
-point_cloud = point_cloud[mask]
-color = color[mask]
-
-cloud = o3d.geometry.PointCloud()
-cloud.points = o3d.utility.Vector3dVector(point_cloud.reshape(-1, 3).astype(np.float32))
-cloud.colors = o3d.utility.Vector3dVector(color.reshape(-1, 3).astype(np.float32))
-gg = GraspGroup(poses)
-gg = gg.nms()
-# print(gg.grasp_group_array.shape)
-gg = gg.sort_by_score()
-if gg.__len__() > 30:
- gg = gg[:30]
-grippers = gg.to_open3d_geometry_list()
-o3d.visualization.draw_geometries([cloud, *grippers])