forked from iwater2018/badou-ai-special-2024
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path23.ransac.py
178 lines (157 loc) · 7.69 KB
/
23.ransac.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import numpy as np
import scipy as sp
import scipy.linalg as sl
def ransac(data, model, n, k, t, d, debug=False, return_all=False):
"""
输入:
data - 样本点
model - 假设模型:事先自己确定
n - 生成模型所需的最少样本点
k - 最大迭代次数
t - 阈值:作为判断点满足模型的条件
d - 拟合较好时,需要的样本点最少的个数,当做阈值看待
输出:
bestfit - 最优拟合解(返回nil,如果未找到)
iterations = 0
bestfit = nil #后面更新
besterr = something really large #后期更新besterr = thiserr
while iterations < k
{
maybeinliers = 从样本中随机选取n个,不一定全是局内点,甚至全部为局外点
maybemodel = n个maybeinliers 拟合出来的可能符合要求的模型
alsoinliers = emptyset #满足误差要求的样本点,开始置空
for (每一个不是maybeinliers的样本点)
{
if 满足maybemodel即error < t
将点加入alsoinliers
}
if (alsoinliers样本点数目 > d)
{
%有了较好的模型,测试模型符合度
bettermodel = 利用所有的maybeinliers 和 alsoinliers 重新生成更好的模型
thiserr = 所有的maybeinliers 和 alsoinliers 样本点的误差度量
if thiserr < besterr
{
bestfit = bettermodel
besterr = thiserr
}
}
iterations++
}
return bestfit
"""
iterations = 0
bestfit = None
besterr = np.inf # 设置默认值
best_inlier_idxs = None
while iterations < k:
maybe_idxs, test_idxs = random_partition(n, data.shape[0])
print('test_idxs = ', test_idxs)
maybe_inliers = data[maybe_idxs, :] # 获取size(maybe_idxs)行数据(Xi,Yi)
test_points = data[test_idxs] # 若干行(Xi,Yi)数据点
maybemodel = model.fit(maybe_inliers) # 拟合模型
test_err = model.get_error(test_points, maybemodel) # 计算误差:平方和最小
print('test_err = ', test_err < t)
also_idxs = test_idxs[test_err < t]
print('also_idxs = ', also_idxs)
also_inliers = data[also_idxs, :]
if debug:
print('test_err.min()', test_err.min())
print('test_err.max()', test_err.max())
print('numpy.mean(test_err)', numpy.mean(test_err))
print('iteration %d:len(alsoinliers) = %d' % (iterations, len(also_inliers)))
# if len(also_inliers > d):
print('d = ', d)
if (len(also_inliers) > d):
betterdata = np.concatenate((maybe_inliers, also_inliers)) # 样本连接
bettermodel = model.fit(betterdata)
better_errs = model.get_error(betterdata, bettermodel)
thiserr = np.mean(better_errs) # 平均误差作为新的误差
if thiserr < besterr:
bestfit = bettermodel
besterr = thiserr
best_inlier_idxs = np.concatenate((maybe_idxs, also_idxs)) # 更新局内点,将新点加入
iterations += 1
if bestfit is None:
raise ValueError("did't meet fit acceptance criteria")
if return_all:
return bestfit, {'inliers': best_inlier_idxs}
else:
return bestfit
def random_partition(n, n_data):
"""return n random rows of data and the other len(data) - n rows"""
all_idxs = np.arange(n_data) # 获取n_data下标索引
np.random.shuffle(all_idxs) # 打乱下标索引
idxs1 = all_idxs[:n]
idxs2 = all_idxs[n:]
return idxs1, idxs2
class LinearLeastSquareModel:
# 最小二乘求线性解,用于RANSAC的输入模型
def __init__(self, input_columns, output_columns, debug=False):
self.input_columns = input_columns
self.output_columns = output_columns
self.debug = debug
def fit(self, data):
# np.vstack按垂直方向(行顺序)堆叠数组构成一个新的数组
A = np.vstack([data[:, i] for i in self.input_columns]).T # 第一列Xi-->行Xi
B = np.vstack([data[:, i] for i in self.output_columns]).T # 第二列Yi-->行Yi
x, resids, rank, s = sl.lstsq(A, B) # residues:残差和
return x # 返回最小平方和向量
def get_error(self, data, model):
A = np.vstack([data[:, i] for i in self.input_columns]).T # 第一列Xi-->行Xi
B = np.vstack([data[:, i] for i in self.output_columns]).T # 第二列Yi-->行Yi
B_fit = sp.dot(A, model) # 计算的y值,B_fit = model.k*A + model.b
err_per_point = np.sum((B - B_fit) ** 2, axis=1) # sum squared error per row
return err_per_point
def test():
# 生成理想数据
n_samples = 500 # 样本个数
n_inputs = 1 # 输入变量个数
n_outputs = 1 # 输出变量个数
A_exact = 20 * np.random.random((n_samples, n_inputs)) # 随机生成0-20之间的500个数据:行向量
perfect_fit = 60 * np.random.normal(size=(n_inputs, n_outputs)) # 随机线性度,即随机生成一个斜率
B_exact = sp.dot(A_exact, perfect_fit) # y = x * k
# 加入高斯噪声,最小二乘能很好的处理
A_noisy = A_exact + np.random.normal(size=A_exact.shape) # 500 * 1行向量,代表Xi
B_noisy = B_exact + np.random.normal(size=B_exact.shape) # 500 * 1行向量,代表Yi
if 1:
# 添加"局外点"
n_outliers = 100
all_idxs = np.arange(A_noisy.shape[0]) # 获取索引0-499
np.random.shuffle(all_idxs) # 将all_idxs打乱
outlier_idxs = all_idxs[:n_outliers] # 100个0-500的随机局外点
A_noisy[outlier_idxs] = 20 * np.random.random((n_outliers, n_inputs)) # 加入噪声和局外点的Xi
B_noisy[outlier_idxs] = 50 * np.random.normal(size=(n_outliers, n_outputs)) # 加入噪声和局外点的Yi
# setup model
all_data = np.hstack((A_noisy, B_noisy)) # 形式([Xi,Yi]....) shape:(500,2)500行2列
input_columns = range(n_inputs) # 数组的第一列x:0
output_columns = [n_inputs + i for i in range(n_outputs)] # 数组最后一列y:1
debug = False
model = LinearLeastSquareModel(input_columns, output_columns, debug=debug) # 类的实例化:用最小二乘生成已知模型
linear_fit, resids, rank, s = sp.linalg.lstsq(all_data[:, input_columns], all_data[:, output_columns])
# run RANSAC 算法
ransac_fit, ransac_data = ransac(all_data, model, 50, 1000, 7e3, 300, debug=debug, return_all=True)
if 1:
import pylab
sort_idxs = np.argsort(A_exact[:, 0])
A_col0_sorted = A_exact[sort_idxs] # 秩为2的数组
if 1:
pylab.plot(A_noisy[:, 0], B_noisy[:, 0], 'k.', label='data') # 散点图
pylab.plot(A_noisy[ransac_data['inliers'], 0], B_noisy[ransac_data['inliers'], 0], 'bx',
label="RANSAC data")
else:
pylab.plot(A_noisy[non_outlier_idxs, 0], B_noisy[non_outlier_idxs, 0], 'k.', label='noisy data')
pylab.plot(A_noisy[outlier_idxs, 0], B_noisy[outlier_idxs, 0], 'r.', label='outlier data')
pylab.plot(A_col0_sorted[:, 0],
np.dot(A_col0_sorted, ransac_fit)[:, 0],
label='RANSAC fit')
pylab.plot(A_col0_sorted[:, 0],
np.dot(A_col0_sorted, perfect_fit)[:, 0],
label='exact system')
pylab.plot(A_col0_sorted[:, 0],
np.dot(A_col0_sorted, linear_fit)[:, 0],
label='linear fit')
pylab.legend()
pylab.show()
if __name__ == "__main__":
test()