forked from tmbdev/clstm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextras.cc
562 lines (497 loc) · 16 KB
/
extras.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
// -*- C++ -*-
// Copyright 2006-2007 Deutsches Forschungszentrum fuer Kuenstliche Intelligenz
// or its licensors, as applicable.
// Copyright 1995-2005 Thomas M. Breuel
//
// You may not use this file except under the terms of the accompanying license.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you
// may not use this file except in compliance with the License. You may
// obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// FIXME: speed up convolutions, maybe using Eigen built-in
#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
extern "C" {
#include <assert.h>
#include <math.h>
#include <unistd.h>
#define __sigsetjmp __sigsetjump0
#ifdef GOOGLE
#include "third_party/png/png.h"
#else
#include <png.h>
#endif
#undef __sigsetjmp
}
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "extras.h"
#include "pytensor.h"
#include "utils.h"
namespace ocropus {
using namespace std;
template <class T, int N>
inline TensorMap<Tensor<T, N>> TM(Tensor<T, N> &t) {
return TensorMap<Tensor<T, N>>(t.data(), t.dimensions());
}
/// Perform 1D Gaussian convolutions using a FIR filter.
///
/// The mask is computed to 3 sigma.
template <class T>
void gauss1d(Tensor<T, 1> &out, Tensor<T, 1> &in, float sigma) {
out.resize(in.dimension(0));
// make a normalized mask
int range = 1 + int(3.0 * sigma);
Tensor<float, 1> mask(2 * range + 1);
for (int i = 0; i <= range; i++) {
double y = exp(-i * i / 2.0 / sigma / sigma);
mask(range + i) = mask(range - i) = y;
}
float total = 0.0;
for (int i = 0; i < mask.dimension(0); i++) total += mask(i);
for (int i = 0; i < mask.dimension(0); i++) mask(i) /= total;
T *in_ = in.data();
float *mask_ = mask.data();
// apply it
int n = in.size();
int m = mask.dimension(0);
for (int i = 0; i < n; i++) {
double total = 0.0;
for (int j = 0; j < m; j++) {
int index = i + j - range;
if (index < 0) index = 0;
if (index >= n) index = n - 1;
total += in_[index] * mask_[j]; // it's symmetric
}
out(i) = T(total);
}
}
template void gauss1d(Tensor<unsigned char, 1> &out,
Tensor<unsigned char, 1> &in, float sigma);
template void gauss1d(Tensor<float, 1> &out, Tensor<float, 1> &in, float sigma);
/// Perform 1D Gaussian convolutions using a FIR filter.
///
/// The mask is computed to 3 sigma.
template <class T>
void gauss1d(Tensor<T, 1> &v, float sigma) {
Tensor<T, 1> temp;
gauss1d(temp, v, sigma);
v = temp;
}
template void gauss1d(Tensor<unsigned char, 1> &v, float sigma);
template void gauss1d(Tensor<float, 1> &v, float sigma);
/// Perform 2D Gaussian convolutions using a FIR filter.
///
/// The mask is computed to 3 sigma.
void gauss2d(TensorMap2 a, float sx, float sy) {
Tensor<Float, 1> r, s;
for (int i = 0; i < a.dimension(0); i++) {
r = a.chip(i, 0);
gauss1d(s, r, sy);
a.chip(i, 0) = s;
}
for (int j = 0; j < a.dimension(1); j++) {
r = a.chip(j, 1);
gauss1d(s, r, sx);
a.chip(j, 1) = s;
}
}
void gauss2d(TensorMap2 image, float sx, float sy);
inline int clip(int x, int n) {
if (x < 0) return 0;
if (x >= n) return n - 1;
return x;
}
inline Float bilin(TensorMap2 a, float x, float y) {
int w = a.dimension(0), h = a.dimension(1);
int i = (int)floor(x);
int j = (int)floor(y);
float l = x - i;
float m = y - j;
float s00 = a(clip(i, w), clip(j, h));
float s01 = a(clip(i, w), clip(j + 1, h));
float s10 = a(clip(i + 1, w), clip(j, h));
float s11 = a(clip(i + 1, w), clip(j + 1, h));
return ((1.0 - l) * ((1.0 - m) * s00 + m * s01) +
l * ((1.0 - m) * s10 + m * s11));
}
struct NoNormalizer : INormalizer {
void measure(TensorMap2 line) {}
void normalize(Tensor2 &out, TensorMap2 in) {
assert(in.dimension(1) == target_height);
out = in;
}
};
struct MeanNormalizer : INormalizer {
double y_mean = -1;
double y_mad = -1;
void getparams(bool verbose) {
vscale = getrenv("norm_vscale", 1.0);
range = getrenv("norm_range", 1.0);
if (verbose) print("mean_normalizer", range, vscale);
}
void measure(TensorMap2 line) {
{
double sy = 0, s1 = 0;
for (int i = 0; i < line.dimension(0); i++) {
for (int j = 0; j < line.dimension(1); j++) {
sy += line(i, j) * j;
s1 += line(i, j);
}
}
y_mean = sy / s1;
}
{
double sy = 0, s1 = 0;
for (int i = 0; i < line.dimension(0); i++) {
for (int j = 0; j < line.dimension(1); j++) {
sy += line(i, j) * fabs(j - y_mean);
s1 += line(i, j);
}
}
y_mad = sy / s1;
}
}
void normalize(Tensor2 &out, TensorMap2 in) {
float actual = vscale * 2 * range * y_mad;
float scale = actual / target_height;
cerr << "normalize: " << y_mean << " " << y_mad << " " << actual << endl;
int nw = int(in.dimension(0) / scale);
int nh = target_height;
out.resize(nw, nh);
for (int i = 0; i < nw; i++) {
for (int j = 0; j < nh; j++) {
float x = scale * i;
float y = scale * (j - target_height / 2) + y_mean;
out(i, j) = bilin(in, x, y);
}
}
}
};
void argmax1(Tensor<float, 1> &m, TensorMap2 a) {
m.resize(a.dimension(0));
for (int i = 0; i < a.dimension(0); i++) {
float mv = a(i, 0);
float mj = 0;
for (int j = 1; j < a.dimension(1); j++) {
if (a(i, j) < mv) continue;
mv = a(i, j);
mj = j;
}
m(i) = mj;
}
}
inline void add_smear(TensorMap2 smooth, TensorMap2 line) {
int w = line.dimension(0);
int h = line.dimension(1);
for (int j = 0; j < h; j++) {
double v = 0.0;
for (int i = 0; i < w; i++) {
v = v * 0.9 + line(i, j);
smooth(i, j) += fmin(1.0, v) * 1e-3;
}
}
}
struct CenterNormalizer : INormalizer {
pytensor::PyServer *py = 0;
Tensor<float, 1> center;
float r = -1;
void setPyServer(void *p) { this->py = (pytensor::PyServer *)p; }
void getparams(bool verbose) {
range = getrenv("norm_range", 4.0);
smooth2d = getrenv("norm_smooth2d", 1.0);
smooth1d = getrenv("norm_smooth1d", 0.3);
if (verbose) print("center_normalizer", range, smooth2d, smooth1d);
}
void measure(TensorMap2 line) {
Tensor2 smooth, smooth2;
int w = line.dimension(0);
int h = line.dimension(1);
smooth = line;
gauss2d(smooth(), h * smooth2d, h * 0.5);
add_smear(smooth(), line); // just to avoid singularities
Tensor<float, 1> a(w);
argmax1(a, smooth());
gauss1d(center, a, h * smooth1d);
float s1 = 0.0;
float sy = 0.0;
for (int i = 0; i < w; i++) {
for (int j = 0; j < h; j++) {
s1 += line(i, j);
sy += line(i, j) * fabs(j - center(i));
}
}
float mad = sy / s1;
r = int(range * mad + 1);
if (py) {
#ifdef FIXME
print("r", r);
py->eval("ion(); clf()");
py->eval("subplot(211)");
py->imshowT(line, "cmap=cm.gray,interpolation='nearest'");
py->eval("subplot(212)");
py->imshowT(smooth, "cmap=cm.gray,interpolation='nearest'");
py->plot(center);
py->eval("print ginput(999)");
#endif
}
}
void normalize(Tensor2 &out, TensorMap2 in) {
int w = in.dimension(0);
if (w != center.dimension(0)) THROW("measure doesn't match normalize");
float scale = (2.0 * r) / target_height;
int target_width = max(int(w / scale), 1);
out.resize(target_width, target_height);
for (int i = 0; i < out.dimension(0); i++) {
for (int j = 0; j < out.dimension(1); j++) {
float x = scale * i;
float y = scale * (j - target_height / 2) + center(int(x));
out(i, j) = bilin(in, x, y);
}
}
}
};
INormalizer *make_NoNormalizer() { return new NoNormalizer(); }
INormalizer *make_MeanNormalizer() { return new MeanNormalizer(); }
INormalizer *make_CenterNormalizer() { return new CenterNormalizer(); }
INormalizer *make_Normalizer(const string &name) {
if (name == "none") return make_NoNormalizer();
if (name == "mean") return make_MeanNormalizer();
if (name == "center") return make_CenterNormalizer();
THROW("unknown normalizer name");
return 0;
}
// PNG I/O (taken from iulib)
#define CHECK_CONDITION(X) \
do { \
if (!(X)) THROW("CHECK: " #X); \
} while (0)
#define CHECK_ARG(X) \
do { \
if (!(X)) THROW("CHECK_ARG: " #X); \
} while (0)
bool png_flip = false;
void read_png(Tensor<unsigned char, 3> &image, FILE *fp) {
int d;
int spp;
int png_transforms;
int num_palette;
png_byte bit_depth, color_type, channels;
int w, h, rowbytes;
png_bytep rowptr;
png_bytep *row_pointers;
png_structp png_ptr;
png_infop info_ptr, end_info;
png_colorp palette;
if (!fp) THROW("fp not defined");
// Allocate the 3 data structures
if ((png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, (png_voidp)NULL,
NULL, NULL)) == NULL)
THROW("png_ptr not made");
if ((info_ptr = png_create_info_struct(png_ptr)) == NULL) {
png_destroy_read_struct(&png_ptr, (png_infopp)NULL, (png_infopp)NULL);
THROW("info_ptr not made");
}
if ((end_info = png_create_info_struct(png_ptr)) == NULL) {
png_destroy_read_struct(&png_ptr, &info_ptr, (png_infopp)NULL);
THROW("end_info not made");
}
// Set up png setjmp error handling
if (setjmp(png_jmpbuf(png_ptr))) {
png_destroy_read_struct(&png_ptr, &info_ptr, &end_info);
THROW("internal png error");
}
png_init_io(png_ptr, fp);
// Set the transforms flags. Whatever you do here,
// DO NOT invert binary using PNG_TRANSFORM_INVERT_MONO!!
// To remove alpha channel, use PNG_TRANSFORM_STRIP_ALPHA
// To strip 16 --> 8 bit depth, use PNG_TRANSFORM_STRIP_16 */
//#if 0 /* this does both */
// png_transforms = PNG_TRANSFORM_STRIP_16 | PNG_TRANSFORM_STRIP_ALPHA;
//#else /* this just strips alpha */
// png_transforms = PNG_TRANSFORM_STRIP_ALPHA;
//#endif
png_transforms = PNG_TRANSFORM_STRIP_16 | PNG_TRANSFORM_STRIP_ALPHA |
PNG_TRANSFORM_PACKING | PNG_TRANSFORM_EXPAND;
// Do it!
png_read_png(png_ptr, info_ptr, png_transforms, NULL);
row_pointers = png_get_rows(png_ptr, info_ptr);
w = png_get_image_width(png_ptr, info_ptr);
h = png_get_image_height(png_ptr, info_ptr);
bit_depth = png_get_bit_depth(png_ptr, info_ptr);
rowbytes = png_get_rowbytes(png_ptr, info_ptr);
color_type = png_get_color_type(png_ptr, info_ptr);
channels = png_get_channels(png_ptr, info_ptr);
spp = channels;
if (spp == 1) {
d = bit_depth;
} else if (spp == 2) {
d = 2 * bit_depth;
THROW("there shouldn't be 2 spp!");
} else if (spp == 3) {
d = 4 * bit_depth;
} else { /* spp == 4 */
d = 4 * bit_depth;
THROW("there shouldn't be 4 spp!");
}
/* Remove if/when this is implemented for all bit_depths */
if (spp == 3 && bit_depth != 8) {
fprintf(stderr, "Help: spp = 3 and depth = %d != 8\n!!", bit_depth);
THROW("not implemented for this depth");
}
Tensor<int, 2> color_map;
if (color_type == PNG_COLOR_TYPE_PALETTE ||
color_type == PNG_COLOR_MASK_PALETTE) { /* generate a colormap */
png_get_PLTE(png_ptr, info_ptr, &palette, &num_palette);
color_map.resize(3, num_palette);
for (int cindex = 0; cindex < num_palette; cindex++) {
color_map(0, cindex) = palette[cindex].red;
color_map(1, cindex) = palette[cindex].green;
color_map(2, cindex) = palette[cindex].blue;
}
}
assert(w > 0 && h > 0);
image.resize(w, h, 3);
if (spp == 1) {
CHECK_CONDITION(color_type != PNG_COLOR_TYPE_PALETTE &&
color_type != PNG_COLOR_MASK_PALETTE);
CHECK_CONDITION(bit_depth == 1 || bit_depth == 8);
for (int i = 0; i < h; i++) {
rowptr = row_pointers[i];
for (int j = 0; j < w; j++) {
int x = j;
int y = png_flip ? (h - i - 1) : i;
int value;
if (bit_depth == 1) {
value = (rowptr[j / 8] & (128 >> (j % 8))) ? 255 : 0;
} else {
value = rowptr[j];
}
image(x, y, 0) = value;
image(x, y, 1) = value;
image(x, y, 2) = value;
}
}
} else {
CHECK_CONDITION(color_type != PNG_COLOR_TYPE_PALETTE &&
color_type != PNG_COLOR_MASK_PALETTE);
CHECK_CONDITION(bit_depth == 8);
for (int i = 0; i < h; i++) {
rowptr = row_pointers[i];
int k = 0;
for (int j = 0; j < w; j++) {
int x = j;
int y = png_flip ? (h - i - 1) : i;
image(x, y, 0) = rowptr[k++];
image(x, y, 1) = rowptr[k++];
image(x, y, 2) = rowptr[k++];
}
}
}
png_destroy_read_struct(&png_ptr, &info_ptr, &end_info);
}
void write_png(FILE *fp, Tensor<unsigned char, 3> &image) {
int d;
png_byte bit_depth, color_type;
int w, h;
png_structp png_ptr;
png_infop info_ptr;
unsigned int default_xres = 300;
unsigned int default_yres = 300;
CHECK_ARG(image.rank() == 2 ||
(image.rank() == 3 && image.dimension(2) == 3));
if (!fp) THROW("stream not open");
/* Allocate the 2 data structures */
if ((png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, (png_voidp)NULL,
NULL, NULL)) == NULL)
THROW("png_ptr not made");
if ((info_ptr = png_create_info_struct(png_ptr)) == NULL) {
png_destroy_write_struct(&png_ptr, (png_infopp)NULL);
THROW("info_ptr not made");
}
/* Set up png setjmp error handling */
if (setjmp(png_jmpbuf(png_ptr))) {
png_destroy_write_struct(&png_ptr, &info_ptr);
THROW("internal png error");
}
png_init_io(png_ptr, fp);
w = image.dimension(0);
h = image.dimension(1);
d = image.dimension(2);
bit_depth = 8;
color_type = PNG_COLOR_TYPE_RGB;
png_set_IHDR(png_ptr, info_ptr, w, h, bit_depth, color_type,
PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE,
PNG_FILTER_TYPE_BASE);
png_set_pHYs(png_ptr, info_ptr, default_xres, default_yres,
PNG_RESOLUTION_METER);
png_write_info(png_ptr, info_ptr);
Tensor<unsigned char, 1> rowbuffer;
rowbuffer.resize(3 * w);
for (int i = 0; i < h; i++) {
int k = 0;
for (int j = 0; j < w; j++) {
int x = j;
int y = png_flip ? (h - i - 1) : i;
if (d == 1) {
int value = image(x, y, 0);
rowbuffer(k++) = value;
rowbuffer(k++) = value;
rowbuffer(k++) = value;
} else {
rowbuffer(k++) = image(x, y, 0);
rowbuffer(k++) = image(x, y, 1);
rowbuffer(k++) = image(x, y, 2);
}
}
png_byte *p = &rowbuffer(0);
png_write_rows(png_ptr, &p, 1);
}
png_write_end(png_ptr, info_ptr);
png_destroy_write_struct(&png_ptr, &info_ptr);
}
inline double clip(double value, double lo, double hi) {
return value < lo ? lo : value > hi ? hi : value;
}
void read_png(Tensor2 &image, const char *name) {
Tensor<unsigned char, 3> temp;
FILE *stream = fopen(name, "r");
if (!stream) THROW("error on open");
read_png(temp, stream);
fclose(stream);
image.resize(temp.dimension(0), temp.dimension(1));
for (int i = 0; i < temp.dimension(0); i++) {
for (int j = 0; j < temp.dimension(1); j++) {
if (temp.dimension(2) == 1)
image(i, j) = temp(i, j, 0);
else
image(i, j) =
(temp(i, j, 0) + temp(i, j, 1) + temp(i, j, 2)) / (3 * 255.0);
}
}
}
void write_png(const char *name, TensorMap2 image) {
Tensor<unsigned char, 3> temp;
temp.resize(image.dimension(0), image.dimension(1), 3);
for (int i = 0; i < temp.dimension(0); i++) {
for (int j = 0; j < temp.dimension(1); j++) {
unsigned char value = floor(clip(image(i, j) * 256, 0.0, 255.999999));
temp(i, j, 0) = value;
temp(i, j, 1) = value;
temp(i, j, 2) = value;
}
}
FILE *stream = fopen(name, "w");
if (!stream) THROW("error on open");
write_png(stream, temp);
fclose(stream);
}
}