-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbitvector.h
1328 lines (1224 loc) · 42.3 KB
/
bitvector.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// $Id$
// Author: John Wu <John.Wu at ACM.org>
// Lawrence Berkeley National Laboratory
// Copyright 2000-2014 the Regents of the University of California
#ifndef BITVECTOR_H
#define BITVECTOR_H
///@file
/// Definition of Word-Aligned Hybrid code.
#include "array_t.h" // alternative to std::vector
#if defined(_MSC_VER) && defined(_WIN32)
//disable warnings on extern before template instantiation
#pragma warning (disable : 4231)
#endif
#if defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)) && defined(CXX_USE_DLL) && !defined(DLL_EXPORT)
extern template class FASTBIT_CXX_DLLSPEC ibis::array_t<uint32_t>;
#endif
/**
@brief A data structure to represent a sequence of bits.
Key features
A bitvector object stores a sequence of bits and provides fast bitwise
logical operations. In addition, it supports operations to append new
bits from the end, read bits at arbitrary location and set bits at
arbitrary location. It also supports an iterator, a const_iterator and an
indexSet.
Encoding format <http://lbl.gov/~kwu/ps/LBNL-49626.html>
<http://portal.acm.org/citation.cfm?doid=1132863.1132864>
Incoming bits are organized into words (bitvector::word_t). A word is a
literal word if its Most Significant Bit (MSB) is 0, it is a fill word if
its MSB is 1. A literal word stores literal bit values in the bit
position following the MSB and a fill word stores a sequence of
consecutive bits that are of the same value, i.e., a fill. The second
most significant bit of the fill word is the bit value, the remaining bits
of the word is a unsigned integer that stores the length of the fill as
number of equivalent literal words, i.e., how many literal words it will
take if the fill is stored in literal words.
Restrictions
<ul>
<li> The number of bits must be expressible by one single
bitvector::word_t. This ensures that a fill word can store a fill of
any valid length without performing a bound check. If
bitvector::word_t is 32-bit long, the maximum number of bits that can
be represented by a bitvector object is 4 billion.
<li> When adding a bit with bitvector::operator+=, the integer value passed
in must be one of 0 or 1. Since checking whether the import value is 0
or not 0 causes pipeline bubble in CPU, we have opted for not performing
the check. An input value other than 0 or 1 will cause unrelated bits
to be modified and producing an incorrect bitvector.
</ul>
@ingroup FastBitIBIS
*/
class FASTBIT_CXX_DLLSPEC ibis::bitvector {
public:
typedef uint32_t word_t;///!< The basic unit of data storage.
/// Destructor.
~bitvector() {clear();};
// constructors of bitvector class
bitvector();
bitvector(const bitvector& bv);
explicit bitvector(const array_t<word_t>& arr);
explicit bitvector(const char* file);
inline const bitvector& operator=(const bitvector& bv);
inline bitvector& copy(const bitvector& bv);
inline bitvector& swap(bitvector& bv);
// use bv to replace part of the existing value, match the ith bit with
// the first of bv, return reference to self
//bitvector& copy(const word_t i, const bitvector& bv);
void setBit(const word_t i, int val);
int getBit(const word_t i) const;
inline void turnOnRawBit(const word_t i);
void erase(word_t i, word_t j);
void set(int val, word_t n);
void clear();
bitvector& operator+=(const bitvector& bv);
inline bitvector& operator+=(int b);
void appendWord(word_t w);
inline void appendFill(int val, word_t n);
int operator==(const bitvector& rhs) const;
void flip();
///@brief Perform bitwise AND between this bitvector and @c rhs.
void operator&=(const bitvector& rhs);
///@brief Perform bitwise AND between this bitvector and @c rhs, return
/// the result as a new bitvector.
bitvector* operator&(const bitvector&) const;
///@brief Perform bitwise OR.
void operator|=(const bitvector& rhs);
///@brief Perform bitwise OR and return the result as a new bitvector.
bitvector* operator|(const bitvector&) const;
///@brief Perform bitwise exclusive or (XOR).
void operator^=(const bitvector& rhs);
///@brief Perform bitwise XOR and return the result as a new bitvector.
bitvector* operator^(const bitvector&) const;
///@brief Perform bitwise subtraction (a & !b).
void operator-=(const bitvector& rhs);
///@brief Perform bitwise subtraction and return the result as a new
/// bitvector.
bitvector* operator-(const bitvector&) const;
bool operator<(const bitvector&) const;
void subset(const bitvector& mask, bitvector& res) const;
word_t count(const bitvector& mask) const;
// I/O functions.
void read(const char *fn);
void write(const char *fn) ;
void write(int fdes) ;
void write(array_t<word_t>& arr) const;
void decompress_icx(int begin, int end);
void compress();
void compress_icx();
void decompress();
void decompress2();
word_t compressible() const;
/// Does this bit vector use less space than the maximum? Return true
/// if yes, otherwise false.
bool isCompressed() const {return (m_vec.size()*MAXBITS < nbits);}
inline word_t size() const throw();
inline void sloppySize(word_t n) const;
inline word_t cnt() const;
inline word_t count() const {return cnt();}
inline word_t sloppyCount() const;
inline word_t numFillWords() const;
/// Return the number of bytes used by the bitvector object in memory.
uint32_t bytes() const throw() {
return (m_vec.size()*sizeof(word_t) + sizeof(bitvector));
};
/// Compute the number of bytes in the serialized version of this
/// bitvector object. This would be the number of bytes this bitvector
/// needs on disk or in an array_t<word_t>.
uint32_t getSerialSize() const throw() {
return (m_vec.size() + 1 + (active.nbits>0)) * sizeof(word_t);
};
/// Return the number of bits in a literal word.
static word_t bitsPerLiteral() {return MAXBITS;}
inline static double randomSize(word_t nb, word_t nc);
inline static double markovSize(word_t nb, word_t nc, double f);
static double clusteringFactor(word_t nb, word_t nc, word_t sz);
void adjustSize(word_t nv, word_t nt);
void reserve(unsigned nb, unsigned nc, double cf=0.0);
/// Is the bitvector empty? For efficiency reasons, this funciton
/// only works correctly on a properly compressed bitvector.
bool empty() const {return all0s() && active.val == 0;}
/// The print function.
std::ostream& print(std::ostream &) const;
/// Iterator that supports modification of individual bit.
class iterator;
inline iterator end();
inline iterator begin();
/// The read-only iterator.
class const_iterator;
inline const_iterator end() const;
inline const_iterator begin() const;
/// A data structure to provide the position of bits that are one.
class indexSet;
inline indexSet firstIndexSet() const;
/// An iterator over the positions that are one.
class pit;
// give accesses to some friends
friend class indexSet;
friend class iterator;
friend class const_iterator;
protected:
inline bool all0s() const;
inline bool all1s() const;
private:
// static members, i.e., constants to be used internally
static const unsigned MAXBITS;
static const unsigned SECONDBIT;
static const word_t FILLBIT;
static const word_t HEADER0;
static const word_t HEADER1;
static const word_t HEADER0_ICX;
static const word_t HEADER1_ICX;
static const word_t HEADER_0F_ICX;
static const word_t HEADER_1F_ICX; //The 2 variables are used for 0-Fill and 1-Fill
static const word_t HEADER_0L_F_0L_ICX;
static const word_t HEADER_0L_F_1L_ICX;
static const word_t HEADER_1L_F_0L_ICX;
static const word_t HEADER_1L_F_1L_ICX;// These are additional codes in the seicx
static const word_t HEADER_FLF_ICX;
static const word_t ALLONES;
static const word_t MAXCNT;
static const word_t MAXCNT_ICX;
static const word_t ICX_0_DIRTYMASK1;
static const word_t ICX_0_DIRTYMASK2;
static const word_t ICX_0_DIRTYMASK3;
static const word_t ICX_0_DIRTYMASK4;
static const word_t ICX_1_DIRTYMASK1;//The definition is used for 1-NI-L
static const word_t ICX_1_DIRTYMASK2;
static const word_t ICX_1_DIRTYMASK3;
static const word_t ICX_1_DIRTYMASK4;
static const word_t HEADER_0NL2_F_ICX;
static const word_t HEADER_L_F_ICX;
/// @brief The struct active_word stores the last few bits that do not
/// fill a whole word.
///
/// It only stores raw bit sequences. The bits are pushed
/// from the right, i.e., the newest bit is stored in the LSB (right
/// most) position.
struct active_word {
word_t val; // the value
word_t nbits; // total number of bits
active_word() : val(0), nbits(0) {};
void reset() {val = 0; nbits = 0;};
int is_full() const {return (nbits >= MAXBITS);};
/// Append a single bit. The argument must be either 0 or 1.
void append(int b) {
val <<= 1; nbits ++; val += b;
};
}; // struct active_word
/// @brief An internal struct used during logical operations to track
/// the usage of fill words.
struct run {
int isFill;
int fillBit;
word_t nWords;
array_t<word_t>::const_iterator it;
run() : isFill(0), fillBit(0), nWords(0), it(0) {};
void decode() { ///!< Decode the word pointed by @c it.
fillBit = (*it > HEADER1);
if (*it > ALLONES) {
nWords = (*it & MAXCNT);
isFill = 1;
}
else {
nWords = 1;
isFill = 0;
}
};
/// Reduce the run size by 1 word. Advance the iterator @c it
/// forward if necessary.
void operator--() {
if (nWords > 1) {--nWords;}
else {++ it; nWords = 0;}
};
/// Reduce the run size by @len words. Advance the iterator @c
/// it forward as necessary.
void operator-=(word_t len) {
while (len > 0) {
if (nWords == 0) decode();
if (isFill != 0) {
if (nWords > len) {nWords -= len; len = 0;}
else if (nWords == len) {nWords = 0; len = 0; ++ it;}
else {len -= nWords; ++ it; nWords = 0;}
}
else {-- len; ++ it; nWords = 0;}
}
};
};
friend struct run;
friend struct active_word;
// member variables of bitvector class
mutable word_t nbits; ///!< Number of bits in @c m_vec.
mutable word_t nset; ///!< Number of bits that are 1 in @c m_vec.
active_word active; ///!< The active word.
array_t<word_t> m_vec; ///!< Store whole words.
// private functions of bitvector class
word_t count_c1(const bitvector& mask) const;
word_t count_c2(const bitvector& mask) const;
// The following three functions all performs or operation, _c2 and _c1
// generate compressed solutions, but _c0, _d1, and _d2 generate
// uncompressed solutions.
// or_c2 assumes there are compressed word in both operands
// or_c1 *this may contain compressed word, but not rhs
// or_c0 assumes both operands are not compressed
// or_d1 *this contains no compressed word and is overwritten with the
// result
// or_d2 both *this and rhs are compressed, but res is not compressed
void or_c2(const bitvector& rhs, bitvector& res) const;
void or_c1(const bitvector& rhs, bitvector& res) const;
void or_c0(const bitvector& rhs);
void or_d1(const bitvector& rhs);
void or_d2(const bitvector& rhs, bitvector& res) const;
void and_c2(const bitvector& rhs, bitvector& res) const;
void and_c1(const bitvector& rhs, bitvector& res) const;
void and_c0(const bitvector& rhs);
void and_d1(const bitvector& rhs);
void and_d2(const bitvector& rhs, bitvector& res) const;
void xor_c2(const bitvector& rhs, bitvector& res) const;
void xor_c1(const bitvector& rhs, bitvector& res) const;
void xor_c0(const bitvector& rhs);
void xor_d1(const bitvector& rhs);
void xor_d2(const bitvector& rhs, bitvector& res) const;
void minus_c2(const bitvector& rhs, bitvector& res) const;
void minus_c1(const bitvector& rhs, bitvector& res) const;
void minus_c1x(const bitvector& rhs, bitvector& res) const;
void minus_c0(const bitvector& rhs);
void minus_d1(const bitvector& rhs);
void minus_d2(const bitvector& rhs, bitvector& res) const;
inline void copy_runs(run& it, word_t& nw); // copy nw words
inline void copy_runsn(run& it, word_t& nw); // copy nw words and negate
inline void copy_fill(array_t<word_t>::iterator& jt, run& it);
inline void copy_runs(array_t<word_t>::iterator& jt, run& it,
word_t& nw);
inline void copy_runsn(array_t<word_t>::iterator& jt, run& it,
word_t& nw);
void decompress(array_t<word_t>& tmp) const;
void copy_comp(array_t<word_t>& tmp) const;
inline void append_active();
inline void append_counter(int val, word_t cnt);
inline word_t cnt_ones(word_t) const; // number of ones in a word
inline word_t cnt_bits(word_t) const; // number of bits in a word
word_t do_cnt() const throw ();
}; // class bitvector
/// The const_iterator class. It iterates on the individual bits.
class ibis::bitvector::const_iterator {
public:
const_iterator() : compressed(0), ind(0), nbits(0), literalvalue(0),
fillbit(0), active(0) {}
const_iterator(const const_iterator& r)
: compressed(r.compressed), ind(r.ind), nbits(r.nbits),
literalvalue(r.literalvalue), fillbit(r.fillbit), active(r.active),
end(r.end), begin(r.begin), it(r.it) {};
const_iterator& operator=(const const_iterator& r) {
compressed = r.compressed; ind = r.ind; nbits = r.nbits;
literalvalue = r.literalvalue; fillbit = r.fillbit; active = r.active;
end = r.end; begin = r.begin; it = r.it;
return *this;}
inline bool operator*() const;
inline int operator!=(const const_iterator& rhs) const throw ();
inline int operator==(const const_iterator& rhs) const throw ();
inline const_iterator& operator++();
inline const_iterator& operator--();
const_iterator& operator+=(int incr);
private:
ibis::bitvector::word_t compressed;
ibis::bitvector::word_t ind;
ibis::bitvector::word_t nbits;
ibis::bitvector::word_t literalvalue;
int fillbit;
const active_word* active;
array_t<word_t>::const_iterator end;
array_t<word_t>::const_iterator begin;
array_t<word_t>::const_iterator it;
void decodeWord();
// give three functions of bitvector access to private variables
friend void ibis::bitvector::erase(word_t i, word_t j);
friend const_iterator ibis::bitvector::begin() const;
friend const_iterator ibis::bitvector::end() const;
friend class ibis::bitvector::iterator;
}; // class ibis::bitvector::const_iterator
/// The iterator that allows modification of bits. It provides only one
/// additional function (operator=) than const_iterator to allow
/// modification of the bit pointed.
/// ********************IMPORTANT********************
/// operator= modifies the content of the bitvector it points to and it can
/// invalidate other iterators or const_iterators referring to the same
/// bitvector.
class ibis::bitvector::iterator {
public:
iterator() : compressed(0), ind(0), nbits(0), literalvalue(0), fillbit(0),
bitv(0), active(0), vec(0) {}
iterator(const iterator& r)
: compressed(r.compressed), ind(r.ind), nbits(r.nbits),
literalvalue(r.literalvalue), fillbit(r.fillbit), bitv(r.bitv),
active(r.active), vec(r.vec), it(r.it) {};
const iterator& operator=(const iterator& r) {
compressed = r.compressed; ind = r.ind; nbits = r.nbits;
literalvalue = r.literalvalue; fillbit = r.fillbit; bitv = r.bitv;
active = r.active; vec = r.vec; it = r.it; return *this;}
inline bool operator*() const; // still can not modify this
inline int operator!=(const const_iterator& rhs) const throw ();
inline int operator==(const const_iterator& rhs) const throw ();
inline int operator!=(const iterator& rhs) const throw ();
inline int operator==(const iterator& rhs) const throw ();
inline iterator& operator++();
inline iterator& operator--();
iterator& operator+=(int incr);
const iterator& operator=(int val);
private:
ibis::bitvector::word_t compressed;
ibis::bitvector::word_t ind;
ibis::bitvector::word_t nbits;
ibis::bitvector::word_t literalvalue;
int fillbit;
ibis::bitvector* bitv;
active_word* active;
array_t<word_t>* vec;
array_t<word_t>::iterator it;
void decodeWord();
friend iterator ibis::bitvector::begin();
friend iterator ibis::bitvector::end();
}; // class ibis::bitvector::iterator
/// @brief The indexSet stores positions of bits that are one.
///
/// It decodes one word of the bitvector at a time. For a fill of ones,
/// the function @c isRange returns true, otherwise it returns false. If
/// isRange returns true, the position of the first bit is pointed by the
/// pointer returned by function @c indices, and there are @c nIndices
/// consecutive ones. If @c isRange returns false, there are @c nIndices
/// bits that are one and the positions of these bits are stored in the
/// array returned by function @c indices.
class FASTBIT_CXX_DLLSPEC ibis::bitvector::indexSet {
public:
/// Default constructor.
indexSet() : it(0), end(0), active(0), nind(0) {}
/// Copy constructor.
indexSet(const indexSet& rhs)
: it(rhs.it), end(rhs.end), active(rhs.active), nind(rhs.nind) {
ind[0] = rhs.ind[0];
ind[1] = rhs.ind[1];
ind[2] = rhs.ind[2];
ind[3] = rhs.ind[3];
ind[4] = rhs.ind[4];
ind[5] = rhs.ind[5];
ind[6] = rhs.ind[6];
ind[7] = rhs.ind[7];
ind[8] = rhs.ind[8];
ind[9] = rhs.ind[9];
ind[10] = rhs.ind[10];
ind[11] = rhs.ind[11];
ind[12] = rhs.ind[12];
ind[13] = rhs.ind[13];
ind[14] = rhs.ind[14];
ind[15] = rhs.ind[15];
ind[16] = rhs.ind[16];
ind[17] = rhs.ind[17];
ind[18] = rhs.ind[18];
ind[19] = rhs.ind[19];
ind[20] = rhs.ind[20];
ind[21] = rhs.ind[21];
ind[22] = rhs.ind[22];
ind[23] = rhs.ind[23];
ind[24] = rhs.ind[24];
ind[25] = rhs.ind[25];
ind[26] = rhs.ind[26];
ind[27] = rhs.ind[27];
ind[28] = rhs.ind[28];
ind[29] = rhs.ind[29];
ind[30] = rhs.ind[30];
ind[31] = rhs.ind[31];
}
/// Assignment operator.
indexSet& operator=(const indexSet& rhs) {
it = rhs.it;
end = rhs.end;
active = rhs.active;
nind = rhs.nind;
ind[0] = rhs.ind[0];
ind[1] = rhs.ind[1];
ind[2] = rhs.ind[2];
ind[3] = rhs.ind[3];
ind[4] = rhs.ind[4];
ind[5] = rhs.ind[5];
ind[6] = rhs.ind[6];
ind[7] = rhs.ind[7];
ind[8] = rhs.ind[8];
ind[9] = rhs.ind[9];
ind[10] = rhs.ind[10];
ind[11] = rhs.ind[11];
ind[12] = rhs.ind[12];
ind[13] = rhs.ind[13];
ind[14] = rhs.ind[14];
ind[15] = rhs.ind[15];
ind[16] = rhs.ind[16];
ind[17] = rhs.ind[17];
ind[18] = rhs.ind[18];
ind[19] = rhs.ind[19];
ind[20] = rhs.ind[20];
ind[21] = rhs.ind[21];
ind[22] = rhs.ind[22];
ind[23] = rhs.ind[23];
ind[24] = rhs.ind[24];
ind[25] = rhs.ind[25];
ind[26] = rhs.ind[26];
ind[27] = rhs.ind[27];
ind[28] = rhs.ind[28];
ind[29] = rhs.ind[29];
ind[30] = rhs.ind[30];
ind[31] = rhs.ind[31];
return *this;
}
//int operator!=(const indexSet& rhs) const {return (it != rhs.it);};
/// Is the index set a consecutive range?
bool isRange() const {return (nind>=ibis::bitvector::MAXBITS);}
/// Pointer to the indices.
const word_t* indices() const {return ind;};
/// Number of indices.
word_t nIndices() const {return nind;}
/// The value of the current compressed word.
const word_t& currentWord() const {return *it;}
indexSet& operator++();
// allow bitvector::firstIndexSet() to access private member variables
friend indexSet ibis::bitvector::firstIndexSet() const;
private:
array_t<word_t>::const_iterator it;
array_t<word_t>::const_iterator end;
const active_word* active; // points back to the active word
word_t nind; // number of indices
word_t ind[32];
}; // class ibis::bitvector::indexSet
/// Iterate over the positive positions one at a time. A positive position
/// is the position where a bit is 1.
///
/// This class iterates over all the positive positions. Immediately after
/// initialization, the "current" bit is the first bit that is 1.
class ibis::bitvector::pit {
public:
pit(): curr(0xFFFFFFFFU) {}
pit(const ibis::bitvector &bv) : curr(0xFFFFFFFFU) {init(bv);}
inline ibis::bitvector::word_t operator*() const;
inline void next();
inline void skip(unsigned);
inline void init(const ibis::bitvector&);
private:
ibis::bitvector::word_t curr;
ibis::bitvector::indexSet iset;
}; // class ibis::bitvector::pit
/// Explicitly set the size of the bitvector. This is intended to be used
/// by indexing functions to avoid counting the number of bits. Caller is
/// responsible for ensuring the size assigned is actually correct. It
/// does not affect the number of bits actually represented by this data
/// structure. To change the number of bits represented by this data
/// structure use the function adjustSize instead.
inline void ibis::bitvector::sloppySize(word_t n) const {
nbits = n-active.nbits;
#if defined(WAH_CHECK_SIZE)
word_t nb = do_cnt();
if (nb != nbits) {
const_cast<ibis::bitvector*>(this)->adjustSize(0, nbits);
LOGGER(ibis::gVerbose >= 0)
<< "bitvector::sloppySize -- adjust the number of bits to "
<< n;
}
#endif
} // ibis::bitvector::sloppySize
/// Provide a sloppy count of the number of bits that are 1. If it returns
/// 0, this bit vector has NO bits that are 1, otherwise, there might be
/// some bits that are 1. However, the return value not equaling to 0 does
/// not necessarily mean there are actually no bit that is 1. It simply
/// means that we can not determine whether all bits are 0 without
/// additional work. This is a sloppy version of count, it only checks the
/// active word and the first one regular word and therefore should be very
/// cheap to run. This function is more useful for situations where one
/// wants to detect an empty bit vector.
inline ibis::bitvector::word_t ibis::bitvector::sloppyCount() const {
if (nset > 0) {
return nset;
}
else if (active.nbits == 0 || active.val == 0) {
if (m_vec.empty() ||
(m_vec.size() == 1 &&
(m_vec[0] == 0 ||
(m_vec[0]>=HEADER0 && m_vec[0]<HEADER1))))
return 0;
else
return 2;
}
else {
return 1;
}
} // ibis::bitvector::sloppyCount
/// Are all bits in regular words 0? It assumes the regular words have
/// been properly compressed and therefore only need to check one word.
inline bool ibis::bitvector::all0s() const {
if (m_vec.empty()) {
return true;
}
else if (m_vec.size() == 1) {
return (m_vec[0] == 0 || (m_vec[0]>=HEADER0 && m_vec[0]<HEADER1));
}
else {
return false;
}
} // ibis::bitvector::all0s
/// Are all bits in regular words 1? It assumes the regular words are
/// properly compressed and therefore only need to examine one word.
inline bool ibis::bitvector::all1s() const {
if (m_vec.size() == 1) {
return (m_vec[0] == ALLONES || (m_vec[0] > HEADER1));
}
else {
return false;
}
} // ibis::bitvector::all1s
/// Compute the number of bits represented by a word.
inline ibis::bitvector::word_t
ibis::bitvector::cnt_bits(ibis::bitvector::word_t val) const {
return ((val>ALLONES) ? ((val&MAXCNT)*MAXBITS) : MAXBITS);
} // ibis::bitvector::cnt_bits
/// Compute the number of ones in a literal word.
inline ibis::bitvector::word_t
ibis::bitvector::cnt_ones(ibis::bitvector::word_t val) const {
// number of 1 bits in a value between 0 and 255
static const word_t table[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
return table[val&0xFFU] + table[(val>>8)&0xFFU] +
table[(val>>16)&0xFFU] + table[(val>>24)&0xFFU];
} // ibis::bitvector::cnt_ones
/// Return the total number of bits in the bit sequence.
inline ibis::bitvector::word_t ibis::bitvector::size() const throw() {
return ((nbits?nbits:(nbits=do_cnt()))+active.nbits);
} // ibis::bitvector::size
/// Return the number of bits that are one.
inline ibis::bitvector::word_t ibis::bitvector::cnt() const {
if (nset==0 && !m_vec.empty())
nbits = do_cnt();
return (nset+cnt_ones(active.val));
} // ibis::bitvector::cnt
/// Return the number of fill words.
inline ibis::bitvector::word_t ibis::bitvector::numFillWords() const {
word_t cnt=0;
array_t<ibis::bitvector::word_t>::const_iterator it = m_vec.begin();
while (it != m_vec.end()) {
cnt += (*it >> ibis::bitvector::MAXBITS);
it++;
}
return cnt;
} // inline word_t ibis::bitvector::numFillWords() const {
/// The assignment operator. Use deep copy. Wanted to use shallow copy
/// for efficiency considerations, but SHALLOW copy causes unexpected
/// problem in test program bitty.cpp.
inline const ibis::bitvector&
ibis::bitvector::operator=(const ibis::bitvector& bv) {
nbits = bv.nbits; nset = bv.nset; active = bv.active;
m_vec.deepCopy(bv.m_vec);
return *this;
}
/// Make a copy. Performs a deep copy.
inline ibis::bitvector& ibis::bitvector::copy(const ibis::bitvector& bv) {
nbits = bv.nbits; nset = bv.nset; active = bv.active;
m_vec.deepCopy(bv.m_vec);
return *this;
}
inline ibis::bitvector& ibis::bitvector::swap(bitvector& bv) {
word_t tmp;
tmp = bv.nbits; bv.nbits = nbits; nbits = tmp;
tmp = bv.nset; bv.nset = nset; nset = tmp;
active_word atmp = bv.active;
bv.active = active; active = atmp;
m_vec.swap(bv.m_vec);
return *this;
}
/// A private function called to make the active word part of the regular
/// words. It assumes that nbits == MAXBITS and refers to MAXBITS instead
/// of @c nbits.
inline void ibis::bitvector::append_active() {
// std::cout << "before: active.val = " << std::hex << active.val;
// if (m_vec.size())
// std::cout << ", m_vec.back() = " << m_vec.back();
// std::cout << std::dec << std::endl;
if (m_vec.empty()) {
m_vec.push_back(active.val);
}
else if (active.val == 0) {// incoming word is zero
if (m_vec.back() == 0) {
m_vec.back() = (HEADER0 + 2);
}
else if (m_vec.back() >= HEADER0 && m_vec.back() < HEADER1) {
++ m_vec.back();
}
else {
m_vec.push_back(active.val);
}
}
else if (active.val == ALLONES) {// incoming word is allones
if (m_vec.back() == ALLONES) {
m_vec.back() = (HEADER1 | 2);
}
else if (m_vec.back() >= HEADER1) {
++m_vec.back();
}
else {
m_vec.push_back(active.val);
}
}
else { // incoming word contains a mixture of bits
m_vec.push_back(active.val);
}
nbits += MAXBITS;
active.reset();
nset = 0;
// std::cout << "after: m_vec.back() = " << std::hex << m_vec.back()
// << std::dec << std::endl;
} // ibis::bitvector::append_active
/// A private function to append a single counter when the active word is
/// empty. The value of @c cnt is assumed to be greater than 0.
inline void ibis::bitvector::append_counter(int val, word_t cnt) {
word_t head = 2 + val;
word_t w = (head << SECONDBIT) + cnt;
nbits += cnt*MAXBITS;
if (m_vec.empty()) {
m_vec.push_back(w);
}
else if ((m_vec.back()>>SECONDBIT) == head) {
m_vec.back() += cnt;
}
else if ((m_vec.back()==ALLONES) && head==3) {
m_vec.back() = w + 1;
}
else if ((m_vec.back() == 0) && head==2) {
m_vec.back() = w + 1;
}
else {
m_vec.push_back(w);
}
} // ibis::bitvector::append_counter
/// Append a single bit. The incoming value must be 0 or 1.
inline ibis::bitvector& ibis::bitvector::operator+=(int b) {
active.append(b);
if (active.is_full())
append_active();
return *this;
} // ibis::bitvector::operator+=
/// Append @c n bits of @c val. The value @c n may be arbitrary integer as
/// long as the resulting size is still representable by a
/// ibis::bitvector::word_t, however, the value @c val must be either 0 or
/// 1.
inline void ibis::bitvector::appendFill(int val, word_t n) {
if (n == 0) return;
if (active.nbits > 0) {
word_t tmp = (MAXBITS - active.nbits);
if (tmp > n) tmp = n;
active.nbits += tmp;
active.val <<= tmp;
n -= tmp;
if (val != 0)
active.val |= (1U<<tmp) - 1;
if (active.nbits >= MAXBITS)
append_active();
}
if (n >= MAXBITS) {
word_t cnt = n / MAXBITS;
if (cnt > 1)
append_counter(val, cnt);
else if (val != 0) {
active.val = ALLONES;
append_active();
}
else {
active.val = 0;
append_active();
}
n -= cnt * MAXBITS;
}
if (n > 0) {
active.nbits = n;
active.val = val*((1U<<n)-1);
}
} // ibis::bitvector::appendFill
/// Copy a group of consecutive runs. It appends nw words starting from
/// 'it' to the current bit vector, assuming active is empty. Both it and
/// nw are modified in this function. On returning from this function, it
/// points to the next unused word and nw stores the value of remaining
/// words to copy.
inline void ibis::bitvector::copy_runs(run& it, word_t& nw) {
// deal with the first word -- attach it to the last word in m_vec
if (it.isFill != 0) {
if (it.nWords > 1) {
append_counter(it.fillBit, it.nWords);
nw -= it.nWords;
}
else if (it.nWords == 1) {
active.val = (it.fillBit != 0 ? ALLONES : 0);
append_active();
-- nw;
}
}
else {
active.val = *(it.it);
append_active();
-- nw;
}
++ it.it;
nset = 0;
it.nWords = 0;
nbits += MAXBITS * nw;
while (nw > 0) { // copy the words
it.decode();
if (nw >= it.nWords) {
m_vec.push_back(*(it.it));
nw -= it.nWords;
it.nWords = 0;
++ it.it;
}
else {
break;
}
}
nbits -= MAXBITS * nw;
} // ibis::bitvector::copy_runs
/// Copy the complements of a set of consecutive runs. It assumes
/// active to be empty.
inline void ibis::bitvector::copy_runsn(run& it, word_t& nw) {
// deal with the first word -- need to attach it to the last word in m_vec
if (it.isFill != 0) {
if (it.nWords > 1) {
append_counter(!it.fillBit, it.nWords);
nw -= it.nWords;
}
else if (it.nWords == 1) {
active.val = (it.fillBit != 0 ? 0 : ALLONES);
append_active();
-- nw;
}
}
else {
active.val = ALLONES ^ *(it.it);
append_active();
-- nw;
}
++ it.it; // advance to the next word
nset = 0;
it.nWords = 0;
nbits += MAXBITS * nw;
while (nw > 0) { // copy the words
it.decode();
if (nw >= it.nWords) {
m_vec.push_back((it.isFill?FILLBIT:ALLONES) ^ *(it.it));
nw -= it.nWords;
it.nWords = 0;
++ it.it;
}
else {
break;
}
}
nbits -= MAXBITS * nw;
} // ibis::bitvector::copy_runsn
/// Copy the fill in "run it" as literal words.
/// This implementation relies on the fact that the iterator jt is actually
/// a bare pointer.
inline void ibis::bitvector::copy_fill
(array_t<ibis::bitvector::word_t>::iterator& jt, run& it) {
if (it.fillBit == 0) {
while (it.nWords > 3) {
*jt = 0;
jt[1] = 0;
jt[2] = 0;
jt[3] = 0;
jt += 4;
it.nWords -= 4;
}
if (it.nWords == 1) {
*jt = 0; ++jt;
}
else if (it.nWords == 2) {
*jt = 0; jt[1] = 0; jt += 2;
}
else if (it.nWords == 3) {
*jt = 0; jt[1] = 0; jt[2] = 0; jt += 3;
}
}
else {
while (it.nWords > 3) {
*jt = ALLONES;
jt[1] = ALLONES;
jt[2] = ALLONES;
jt[3] = ALLONES;
jt += 4;
it.nWords -= 4;
}
if (it.nWords == 1) {
*jt = ALLONES; ++jt;
}
else if (it.nWords == 2) {
*jt = ALLONES; jt[1] = ALLONES; jt += 2;
}
else if (it.nWords == 3) {
*jt = ALLONES; jt[1] = ALLONES; jt[2] = ALLONES; jt += 3;
}
}
it.nWords = 0;
++ it.it;
} // ibis::bitvector::copy_fill
/// Copy the next nw words (nw * MAXBITS bits) starting with run it
/// to an array_t as uncompressed words. If the run has more words than nw,
/// return the left over words to give it a chance for the longer run to be
/// counted first.
inline void ibis::bitvector::copy_runs
(array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) {
while (nw >= it.nWords && nw > 0) {
if (it.isFill != 0) { // copy a fill
const array_t<word_t>::iterator iend = jt + it.nWords;
if (it.fillBit == 0) {
while (jt < iend) {
*jt = 0;
++ jt;
}
}
else {
while (jt < iend) {
*jt = ALLONES;
++ jt;
}
}
nw -= it.nWords;
}
else { // copy a single word
*jt = *(it.it);
++ jt;
-- nw;
}
++ it.it; // advance to the next word
if (nw > 0) {
it.decode();
}
else {
it.nWords = 0;
return;
}
}
} // ibis::bitvector::copy_runs
/// Copy the complements of the next nw words (nw * MAXBITS bits)
/// starting with "run it" as uncompressed words.
inline void ibis::bitvector::copy_runsn
(array_t<ibis::bitvector::word_t>::iterator& jt, run& it, word_t& nw) {
while (nw >= it.nWords) {