-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils2.py
445 lines (338 loc) · 12.3 KB
/
utils2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
import cv2
import numpy as np
import torch
from fastsam import FastSAMPrompt
from matplotlib import pyplot as plt
import config
# updated code
# updated code
def get_bounding_box_coordinates(mask, DEBUG = False):
"""
Calculate the coordinates of the bounding box surrounding the True region in the mask.
Parameters
----------
mask : np.array
Binary mask where True indicates the region of interest.
Returns
-------
dict
Dictionary containing the coordinates of the corners of the bounding box.
Dictionary keys: "top_left", "top_right", "bottom_right", "bottom_left".
Dictionary elements: tuple : (y, x) coordinates of the corners.
"""
DEBUG = DEBUG
# Convert boolean mask to uint8 type
mask_uint8 = mask.astype(np.uint8) * 255
# Threshold the image
_, binary = cv2.threshold(mask_uint8, 127, 255, cv2.THRESH_BINARY)
# Find contours
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Find the largest contour
largest_contour = max(contours, key=cv2.contourArea)
# Create a copy of the original image for drawing
result = cv2.cvtColor(mask_uint8, cv2.COLOR_GRAY2BGR)
# Draw the largest contour
cv2.drawContours(result, [largest_contour], 0, (0, 0, 255), 5)
# Display the mask
if DEBUG : cv2.imshow('Binary Mask with contours', result)
# Reshape the contour to remove the extra dimension
contour_points = largest_contour.reshape(-1, 2)
# Get the top-most point (minimum y)
top_most = tuple(contour_points[contour_points[:, 1].argmin()])
bottom_most = tuple(contour_points[contour_points[:, 1].argmax()])
# Get the left-most point (minimum x)
left_most = tuple(contour_points[contour_points[:, 0].argmin()])
right_most = tuple(contour_points[contour_points[:, 0].argmax()])
top_most = top_most[1], top_most[0]
bottom_most = bottom_most[1], bottom_most[0]
left_most = left_most[1], left_most[0]
# right_most = right_most[1], right_most[0]
right_most = right_most[1], right_most[0]
# Print the coordinates
# print(f"Leftmost coordinate: {left_most}")
# print(f"Rightmost coordinate: {right_most}")
# print(f"Topmost coordinate: {top_most}")
# print(f"Bottommost coordinate: {bottom_most}")
top_left = left_most # red
top_right = top_most # blue
bottom_right = right_most # green (this is where the error is)
bottom_left = bottom_most # yellow/ cyan in opencv
# Blue and Cyan points line is the orientation of the box
orientation_angle = np.arctan2((abs(top_right[1] - bottom_left[1])), (top_right[0] - bottom_left[0])) * 180 / np.pi
# orientation_angle is tan inverse of top
if abs(top_left[1] - top_right[1]) < 10 and abs(top_left[0] - top_right[0]) < 10:
# Find the indices where the mask is True
true_indices = np.argwhere(mask)
# Get the bounding box of the True region
top_left = tuple(np.min(true_indices, axis=0))
bottom_right = tuple(np.max(true_indices, axis=0))
# Remove the channels layer coordinate
top_left = top_left[:-1]
bottom_right = bottom_right[:-1]
# Calculate the width and height of the bounding box
width = bottom_right[1] - top_left[1]
height = bottom_right[0] - top_left[0]
# make a numpy array for top_right and bottom_left
top_right = (top_left[0], top_left[1] + width)
bottom_left = (top_left[0] + height, top_left[1])
orientation_angle = np.arctan2((abs(top_left[1] - bottom_left[1])), (top_left[0] - bottom_left[0])) * 180 / np.pi
return {
"top_left": top_left,
"top_right": top_right,
"bottom_right": bottom_right,
"bottom_left": bottom_left,
"orientation": orientation_angle
}
def process_coordinates(mask, frame_h, frame_w):
# find the indices where the mask is true
true_indices = np.argwhere(mask)
y1, x1 = tuple(np.min(true_indices, axis=0))[:-1]
y4, x4 = tuple(np.max(true_indices, axis=0))[:-1]
print(y1, y4, x1, x4)
# calculate the width and height of the bounding box
width = x4 - x1
height = y4 - y1
# print("box height: ", height, "box width: ", width)
# make a numpy array for top_right and bottom_left
# y2, x2 = y1, x1 + width
# y3, x3 = y1 + height, x1
# box center
bx, by = x1 + width / 2, y1 + height / 2
ty, tx = abs((1088 / 2) - by), abs((1920 / 2) - bx)
theta = np.arctan(ty / tx)
print("translation coords: ", ty, tx, "theta: ", theta)
return {"ty": ty, "tx": tx, "theta": theta}
def get_device():
"""
Get input from the terminal
returns :
device : str : "cuda" or "cpu"
"""
device = input("Choose the device you want to use: 'cuda' or 'cpu' >>> ")
# if cuda is available use "cuda" else use "cpu"
if torch.cuda.is_available() and device == "cuda":
device = "cuda"
else:
device = "cpu"
return device
def get_model(model_name: str):
if model_name == "fastSAM":
from fastsam import FastSAM
model_fast_sam = FastSAM(config.fast_sam_checkpoint)
return model_fast_sam
elif model_name == "fastSAM-s":
from fastsam import FastSAM
model_fast_sam_s = FastSAM(config.fast_sam_s_checkpoint)
return model_fast_sam_s
elif model_name == "SAM":
pass
else:
pass
def plot_image(image):
plt.figure(figsize=(10, 10))
plt.imshow(image)
plt.axis("on")
plt.show()
def show_mask(mask, ax, random_color=False):
if random_color:
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
else:
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
h, w = mask.shape[-2:]
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_image)
def show_points(coords, labels, ax, marker_size=375):
pos_points = coords[labels == 1]
neg_points = coords[labels == 0]
ax.scatter(
pos_points[:, 0],
pos_points[:, 1],
color="green",
marker="*",
s=marker_size,
edgecolor="white",
linewidth=1.25,
)
ax.scatter(
neg_points[:, 0],
neg_points[:, 1],
color="red",
marker="*",
s=marker_size,
edgecolor="white",
linewidth=1.25,
)
def show_box(box, ax):
x0, y0 = box[0], box[1]
w, h = box[2] - box[0], box[3] - box[1]
ax.add_patch(
plt.Rectangle((x0, y0), w, h, edgecolor="green", facecolor=(0, 0, 0, 0), lw=2)
)
import matplotlib.patches as patches
def plot_square(mask):
# Find the indices where the mask is True
true_indices = np.argwhere(mask)
# Get the bounding box of the True region
top_left = np.min(true_indices, axis=0)
bottom_right = np.max(true_indices, axis=0)
# Calculate the width and height of the bounding box
width = bottom_right[1] - top_left[1]
height = bottom_right[0] - top_left[0]
# Create a figure and axis
fig, ax = plt.subplots(1)
# Plot the mask
ax.imshow(mask, cmap="gray")
# Create a rectangle patch
rect = patches.Rectangle(
(top_left[1], top_left[0]),
width,
height,
linewidth=1,
edgecolor="r",
facecolor="none",
)
# Add the rectangle patch to the axis
ax.add_patch(rect)
# Show the plot
plt.show()
def annotate_square_corners(
image, top_left, top_right, bottom_left, bottom_right, save_path
):
# Create a figure and axis
fig, ax = plt.subplots(1)
# Plot the original image
ax.imshow(image)
# Annotate the corners
ax.plot(top_left[0], top_left[1], "ro") # Top Left corner
ax.plot(top_right[0], top_right[1], "go") # Top Right corner
ax.plot(bottom_left[0], bottom_left[1], "bo") # Bottom Left corner
ax.plot(bottom_right[0], bottom_right[1], "yo") # Bottom Right corner
# Save the plot to the specified path
# plt.savefig(save_path)
plt.show()
def get_box_coordinates(
img,
model,
device,
showOriginalImage=False,
showPoints=False,
showPlotMaskWithHighestScore=True,
DEBUG=False,
):
"""
Parameters
----------
img : np.array
Image frame.
model : object
Model object.
device : str
Device identifier.
showOriginalImage : bool, optional
Whether to show the original image. Default is False.
showPoints : bool, optional
Whether to show points. Default is False.
showPlotMaskWithHighestScore : bool, optional
Whether to plot the mask with the highest score. Default is True.
Returns
-------
bounding_box_coords_dict : dict
Dictionary containing the coordinates of the corners of the bounding box.
Dictionary keys: "top_left", "top_right", "bottom_right", "bottom_left".
Dictionary elements: tuple : (y, x) coordinates of the corners.
"""
# plot original image
if showOriginalImage:
plot_image(img)
# get image dimensions
img_height, img_width, _ = img.shape
# get centre point coordinates
center_point_coords = [int(img_width / 2), int(img_height / 2)]
input_point = np.array([center_point_coords])
input_label = np.array([1])
if showPoints:
plt.figure(figsize=(10, 10))
plt.imshow(img)
show_points(input_point, input_label, plt.gca())
plt.axis("on")
plt.show()
# generate the mask in the relevant area
fast_sam_predictor = model(
img, device=device, retina_masks=True, imgsz=img_width, conf=0.4, iou=0.9
)
fast_sam_prompt_process = FastSAMPrompt(img, fast_sam_predictor, device=device)
# point prompt
# points default [[0,0]] [[x1,y1],[x2,y2]]
# point_label default [0] [1,0] 0:background, 1:foreground
img_mask = fast_sam_prompt_process.point_prompt(
points=input_point, pointlabel=input_label
)
# plot_mask_with_score(img, "FastSAM output", img_mask, input_point, input_label )
# reshape image mask
# print(img_mask.shape)
img_mask = np.transpose(img_mask, (1, 2, 0))
if showPlotMaskWithHighestScore:
plot_square(img_mask)
# get the rectangular boxes
# if DEBUG: cv2.imshow("mask", img_mask.astype(np.int8))
bounding_box_coords_dict = get_bounding_box_coordinates(img_mask, DEBUG)
armctrl_dict = process_coordinates(img_mask, img_height, img_width)
armctrl_dict["orientation"] = 180 - bounding_box_coords_dict["orientation"]
del bounding_box_coords_dict["orientation"]
# get the coordinates of the rectangular bounding box
return bounding_box_coords_dict, armctrl_dict
def get_image_with_box_corners(frame, points_dict):
"""
parameters
----------
frame : np.array : image frame
points_dict : dict : dictionary containing the coordinates of the corners of the bounding box
dictionary keys : "top_left", "top_right", "bottom_right", "bottom_left"
dictionary elements : tuple : (y, x) coordinates of the corners
returns
-------
frame : np.array : image frame with the corners of the bounding box annotated
"""
circle_radius = 5
# Define colors for each point in RGB format (not BGR format)
colors_dict = {
"blue": (0, 0, 255),
"red": (255, 0, 0),
"green": (0, 255, 0),
"yellow": (255, 255, 0),
}
# Draw points on the original image
#### Point should be in (x, y) format
# top_left : red
cv2.circle(
frame,
(points_dict["top_left"][1], points_dict["top_left"][0]),
circle_radius,
colors_dict["red"],
-1,
)
# top_right : blue
cv2.circle(
frame,
(points_dict["top_right"][1], points_dict["top_right"][0]),
circle_radius,
colors_dict["blue"],
-1,
)
# bottom_right : green
cv2.circle(
frame,
(points_dict["bottom_right"][1], points_dict["bottom_left"][0]),
circle_radius,
colors_dict["green"],
-1,
)
# bottom_left : yellow
cv2.circle(
frame,
(points_dict["bottom_left"][1], points_dict["bottom_left"][0]),
circle_radius,
colors_dict["yellow"],
-1,
)
return frame