-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathvrt_gpu_fftmax.cu
370 lines (295 loc) · 12.8 KB
/
vrt_gpu_fftmax.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
#include <zmq.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <unistd.h>
#include <boost/format.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/program_options.hpp>
#include <boost/filesystem.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/thread/thread.hpp>
#include <chrono>
// #include <complex>
#include <csignal>
#include <fstream>
#include <iostream>
#include <thread>
// VRT
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <vrt/vrt_read.h>
#include <vrt/vrt_string.h>
#include <vrt/vrt_types.h>
#include <vrt/vrt_util.h>
#include <complex.h>
// CUDA FFT
#include <cufft.h>
#include "vrt-tools.h"
namespace po = boost::program_options;
#define REAL 0
#define IMAG 1
#define SCALE_MAX 32768
__global__ void cumag(cufftComplex *data, float *mag, int size){
int idx = threadIdx.x + blockDim.x*blockIdx.x;
if (idx < size){
mag[idx] = cuCabsf(data[idx]);
}
}
static bool stop_signal_called = false;
void sig_int_handler(int)
{
stop_signal_called = true;
}
template <typename samp_type> inline float get_abs_val(samp_type t)
{
return std::fabs(t);
}
inline float get_abs_val(std::complex<int16_t> t)
{
return std::fabs(t.real());
}
inline float get_abs_val(std::complex<int8_t> t)
{
return std::fabs(t.real());
}
int main(int argc, char* argv[])
{
cuFloatComplex *signal;
float *d_mag;
cufftHandle plan;
uint32_t num_points = 0;
uint32_t fft_len = 1;
int32_t min_bin, max_bin;
// variables to be set by po
std::string file, type, zmq_address;
uint16_t instance, main_port, port;
uint32_t channel;
int hwm;
size_t num_requested_samples;
double total_time, min_offset, max_offset;
// setup the program options
po::options_description desc("Allowed options");
// clang-format off
desc.add_options()
("help", "help message")
// ("file", po::value<std::string>(&file)->default_value("usrp_samples.dat"), "name of the file to write binary samples to")
// ("type", po::value<std::string>(&type)->default_value("short"), "sample type: double, float, or short")
("nsamps", po::value<size_t>(&num_requested_samples)->default_value(0), "total number of samples to receive")
("duration", po::value<double>(&total_time)->default_value(0), "total number of seconds to receive")
("min-offset", po::value<double>(&min_offset), "min. freq. offset to track")
("max-offset", po::value<double>(&max_offset), "max. freq. offset to track")
("fft-duration", po::value<uint32_t>(&fft_len), "number of seconds to integrate")
("channel", po::value<uint32_t>(&channel)->default_value(0), "VRT channel")
("progress", "periodically display short-term bandwidth")
// ("stats", "show average bandwidth on exit")
("int-second", "align start of reception to integer second")
("null", "run without writing to file")
("continue", "don't abort on a bad packet")
("ignore-dc", "Ignore 10 perc. of bins around DC")
("address", po::value<std::string>(&zmq_address)->default_value("localhost"), "VRT ZMQ address")
("zmq-split", "create a ZeroMQ stream per VRT channel, increasing port number for additional streams")
("instance", po::value<uint16_t>(&instance)->default_value(0), "VRT ZMQ instance")
("port", po::value<uint16_t>(&port), "VRT ZMQ port")
("hwm", po::value<int>(&hwm)->default_value(10000), "VRT ZMQ HWM")
;
// clang-format on
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
po::notify(vm);
// print the help message
if (vm.count("help")) {
std::cout << boost::format("VRT samples to fftmax %s") % desc << std::endl;
std::cout << std::endl
<< "This application streams data from a VRT stream "
"to fftmax.\n"
<< std::endl;
return ~0;
}
bool progress = vm.count("progress") > 0;
bool stats = vm.count("stats") > 0;
bool null = vm.count("null") > 0;
bool continue_on_bad_packet = vm.count("continue") > 0;
bool int_second = (bool)vm.count("int-second");
bool ignore_dc = (bool)vm.count("ignore-dc");
bool zmq_split = vm.count("zmq-split") > 0;
context_type vrt_context;
init_context(&vrt_context);
packet_type vrt_packet;
if (vm.count("port") > 0) {
main_port = port;
} else {
main_port = DEFAULT_MAIN_PORT + MAX_CHANNELS*instance;
}
if (zmq_split) {
main_port += channel;
vrt_packet.channel_filt = 1;
} else {
vrt_packet.channel_filt = 1<<channel;
}
// CUDA
cudaFree(0);
// ZMQ
void *context = zmq_ctx_new();
void *subscriber = zmq_socket(context, ZMQ_SUB);
int rc = zmq_setsockopt (subscriber, ZMQ_RCVHWM, &hwm, sizeof hwm);
std::string connect_string = "tcp://" + zmq_address + ":" + std::to_string(main_port);
rc = zmq_connect(subscriber, connect_string.c_str());
assert(rc == 0);
zmq_setsockopt(subscriber, ZMQ_SUBSCRIBE, "", 0);
// time keeping
auto start_time = std::chrono::steady_clock::now();
auto stop_time = start_time + std::chrono::milliseconds(int64_t(1000 * total_time));
uint32_t buffer[ZMQ_BUFFER_SIZE];
unsigned long long num_total_samps = 0;
// Track time and samps between updating the BW summary
auto last_update = start_time;
unsigned long long last_update_samps = 0;
bool first_frame = true;
bool start_rx = false;
uint64_t last_fractional_seconds_timestamp = 0;
uint32_t signal_pointer = 0;
while (not stop_signal_called
and (num_requested_samples > num_total_samps or num_requested_samples == 0)
and (total_time == 0.0 or std::chrono::steady_clock::now() <= stop_time)) {
int len = zmq_recv(subscriber, buffer, ZMQ_BUFFER_SIZE, 0);
const auto now = std::chrono::steady_clock::now();
if (not vrt_process(buffer, sizeof(buffer), &vrt_context, &vrt_packet)) {
printf("Not a Vita49 packet?\n");
continue;
}
if (not start_rx and vrt_packet.context) {
vrt_print_context(&vrt_context);
start_rx = true;
num_points = vrt_context.sample_rate;
min_bin = 0;
max_bin = num_points;
if (vm.count("min-offset")) {
min_bin = min_offset+num_points/2;
min_bin = min_bin < 0 ? 0 : min_bin;
min_bin = min_bin > num_points ? num_points : min_bin;
}
if (vm.count("max-offset")) {
max_bin = max_offset+num_points/2;
max_bin = max_bin < 0 ? 0 : max_bin;
max_bin = max_bin > num_points ? num_points : max_bin;
}
// FFT
if (cufftPlan1d(&plan, num_points, CUFFT_C2C, 1) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: Plan creation failed");
exit(1);
}
cudaMallocManaged((void**) &signal, sizeof(cufftComplex)*num_points );
cudaMallocManaged((void**) &d_mag, sizeof(float)*num_points );
cudaStreamAttachMemAsync(NULL, signal, 0, cudaMemAttachHost);
cudaStreamSynchronize(NULL);
}
if (start_rx and vrt_packet.data) {
if (vrt_packet.lost_frame)
if (not continue_on_bad_packet)
break;
if (int_second) {
// check if fractional second has wrapped
if (vrt_packet.fractional_seconds_timestamp > last_fractional_seconds_timestamp ) {
last_fractional_seconds_timestamp = vrt_packet.fractional_seconds_timestamp;
continue;
} else {
int_second = false;
last_update = now;
start_time = now;
stop_time = start_time + std::chrono::milliseconds(int64_t(1000 * total_time));
}
}
int mult = 1;
for (uint32_t i = 0; i < vrt_packet.num_rx_samps; i++) {
int16_t re;
memcpy(&re, (char*)&buffer[vrt_packet.offset+i], 2);
int16_t img;
memcpy(&img, (char*)&buffer[vrt_packet.offset+i]+2, 2);
signal[signal_pointer].x = mult*re;
signal[signal_pointer].y = mult*img;
mult *= -1;
signal_pointer++;
if (signal_pointer >= num_points) {
signal_pointer = 0;
cudaStreamAttachMemAsync(NULL, signal, 0, cudaMemAttachGlobal);
cudaStreamAttachMemAsync(NULL, d_mag, 0, cudaMemAttachGlobal);
cudaStreamSynchronize(NULL);
if (cufftExecC2C(plan, signal, signal, CUFFT_FORWARD) != CUFFT_SUCCESS){
fprintf(stderr, "CUFFT error: ExecC2C Forward failed");
return 1;
}
cumag<<<(num_points+256-1)/256, 256>>>(signal, d_mag, num_points);
cudaDeviceSynchronize();
cudaStreamAttachMemAsync(NULL, signal, 0, cudaMemAttachHost);
cudaStreamAttachMemAsync(NULL, d_mag, 0, cudaMemAttachHost);
cudaStreamSynchronize(NULL);
double max = 0;
int32_t max_i = -1;
uint32_t dc = num_points/2;
for (uint32_t j = 0; j < num_points; ++j) {
if ( (d_mag[j] > max) and (j >= min_bin) and (j <= max_bin) and not (ignore_dc && j==dc)) {
max = d_mag[j];
max_i = j;
}
}
uint64_t seconds = vrt_packet.integer_seconds_timestamp;
uint64_t frac_seconds = vrt_packet.fractional_seconds_timestamp;
frac_seconds += i*1e12/vrt_context.sample_rate;
if (frac_seconds > 1e12) {
frac_seconds -= 1e12;
seconds++;
}
int64_t peak_hz = vrt_context.rf_freq + max_i - vrt_context.sample_rate/2;
printf("%lu.%09li, %li, %.3f\n", seconds, (int64_t)(frac_seconds/1e3), peak_hz, 20*log10(max/(double)num_points));
fflush(stdout);
}
}
num_total_samps += vrt_packet.num_rx_samps;
if (start_rx and first_frame) {
std::cout << boost::format(
"# First frame: %u samples, %u full secs, %.09f frac secs")
% vrt_packet.num_rx_samps
% vrt_packet.integer_seconds_timestamp
% ((double)vrt_packet.fractional_seconds_timestamp/1e12)
<< std::endl;
first_frame = false;
}
}
if (progress) {
if (vrt_packet.data)
last_update_samps += vrt_packet.num_rx_samps;
const auto time_since_last_update = now - last_update;
if (time_since_last_update > std::chrono::seconds(1)) {
const double time_since_last_update_s =
std::chrono::duration<double>(time_since_last_update).count();
const double rate = double(last_update_samps) / time_since_last_update_s;
std::cout << "\t" << (rate / 1e6) << " Msps, ";
last_update_samps = 0;
last_update = now;
float sum_i = 0;
uint32_t clip_i = 0;
double datatype_max = 32768.;
// if (cpu_format == "sc8" || cpu_format == "s8")
// datatype_max = 128.;
for (int i=0; i<vrt_packet.num_rx_samps; i++ ) {
auto sample_i = get_abs_val((std::complex<int16_t>)buffer[vrt_packet.offset+i]);
sum_i += sample_i;
if (sample_i > datatype_max*0.99)
clip_i++;
}
sum_i = sum_i/vrt_packet.num_rx_samps;
std::cout << boost::format("%.0f") % (100.0*log2(sum_i)/log2(datatype_max)) << "% I (";
std::cout << boost::format("%.0f") % ceil(log2(sum_i)+1) << " of ";
std::cout << (int)ceil(log2(datatype_max)+1) << " bits), ";
std::cout << "" << boost::format("%.0f") % (100.0*clip_i/vrt_packet.num_rx_samps) << "% I clip, ";
std::cout << std::endl;
}
}
}
zmq_close(subscriber);
zmq_ctx_destroy(context);
return 0;
}