-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_preprocess_tianchi_rank22.py
78 lines (69 loc) · 3.68 KB
/
data_preprocess_tianchi_rank22.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/python3
#data preprocess tianchi rank22
def normalize(image):
MIN_BOUND = -1000.0
MAX_BOUND = 400.0
image = (image - MIN_BOUND) / (MAX_BOUND - MIN_BOUND)
image[image > 1] = 1.
image[image < 0] = 0.
return image
def world_2_voxel(world_coordinates, origin, spacing):
stretched_voxel_coordinates = np.absolute(world_coordinates - origin)
voxel_coordinates = stretched_voxel_coordinates / spacing
return voxel_coordinates
def voxel_2_world(voxel_coordinates, origin, spacing):
stretched_voxel_coordinates = voxel_coordinates * spacing
world_coordinates = stretched_voxel_coordinates + origin
return world_coordinates
def make_mask(center,diam,z,width,height,spacing,origin): #只显示结节
mask = np.zeros([height,width])
diam = diam/2
v_center = np.absolute((center-origin))/spacing
v_diam = int(diam/spacing[0])
v_xmin = np.max([0,int(v_center[0]-v_diam)-1])
v_xmax = np.min([width-1,int(v_center[0]+v_diam)+1])
v_ymin = np.max([0,int(v_center[1]-v_diam)-1])
v_ymax = np.min([height-1,int(v_center[1]+v_diam)+1])
v_xrange = range(v_xmin,v_xmax+1)
v_yrange = range(v_ymin,v_ymax+1)
x_data = [x*spacing[0]+origin[0] for x in range(width)]
y_data = [x*spacing[1]+origin[1] for x in range(height)]
for v_x in v_xrange:
for v_y in v_yrange:
p_x = spacing[0]*v_x + origin[0]
p_y = spacing[1]*v_y + origin[1]
if np.linalg.norm(center-np.array([p_x,p_y,z]))<=diam:
mask[np.absolute(int((p_y-origin[1]))/spacing[1]),int((p_x-origin[0])/spacing[0])] = 1.0
return(mask)
def create_samples(df_node,img_file,pic_path):
mini_df = df_node[df_node["file"]==img_file] #get all nodules associate with file
if mini_df.shape[0]>0: # some files may not have a nodule--skipping those
# load the data once
patient_id = img_file.split('/')[-1][:-4]
itk_img = SimpleITK.ReadImage(data_path + img_file)
img_array = SimpleITK.GetArrayFromImage(itk_img) # indexes are z,y,x (notice the ordering)
num_z, height, width = img_array.shape #heightXwidth constitute the transverse plane
origin = np.array(itk_img.GetOrigin()) # x,y,z Origin in world coordinates (mm)
spacing = np.array(itk_img.GetSpacing()) # spacing of voxels in world coor. (mm)
# go through all nodes (why just the biggest?)
for node_idx, cur_row in mini_df.iterrows():
node_x = cur_row["coordX"]
node_y = cur_row["coordY"]
node_z = cur_row["coordZ"]
diam = cur_row["diameter_mm"]
center = np.array([node_x, node_y, node_z]) # nodule center
v_center = np.rint(np.absolute(center-origin)/spacing) # nodule center in voxel space (still x,y,z ordering)
for i, i_z in enumerate(np.arange(int(v_center[2])-1,
int(v_center[2])+2).clip(0, num_z-1)): # clip prevents going out of bounds in Z
img = img_array[i_z]
seg_img, overlap = helpers.get_segmented_lungs(img.copy())
img = normalize(img)
mask = make_mask(center, diam, i_z*spacing[2]+origin[2],
width, height, spacing, origin)
if img.shape[0] > 512:
print patient_id
else:
cv2.imwrite(pic_path + str(patient_id)+'_'+str(node_idx)+'_'+str(i)+ '_i.png',img*255)
cv2.imwrite(pic_path + str(patient_id)+'_'+str(node_idx)+'_'+str(i)+ '_m.png',mask*255)
cv2.imwrite(pic_path + str(patient_id)+'_'+str(node_idx)+'_'+str(i)+ '_o.png',overlap*255)
return