Skip to content
This repository has been archived by the owner on Apr 10, 2024. It is now read-only.

New norm disk format for zap #27

Merged
merged 3 commits into from
Oct 1, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (
"github.com/couchbase/vellum"
)

const Version uint32 = 14
const Version uint32 = 15

const Type string = "zap"

Expand Down
2 changes: 1 addition & 1 deletion merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *Po
newRoaring.Add(uint32(hitNewDocNum))

nextFreq := next.Frequency()
nextNorm := uint64(math.Float32bits(float32(next.Norm())))
nextNorm := fieldLenFromNorm(next.Norm())

locs := next.Locations()

Expand Down
22 changes: 22 additions & 0 deletions merge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -870,3 +870,25 @@ func TestUnder32Bits(t *testing.T) {
t.Errorf("under32Bits wrong")
}
}

func TestEncodeDecodeFieldLenAndNorm(t *testing.T) {
// verifying the floating point correctness for field
// length and norm encode and decode ops up to some
// reasonable field length of 2400.
fieldLensIn := make([]uint64, 2400)
for i := range fieldLensIn {
fieldLensIn[i] = uint64(i + 1)
}

norms := make([]float64, 2400)
for i := range fieldLensIn {
norms[i] = normFromFieldLen(fieldLensIn[i])
}

for i := range norms {
if fieldLenFromNorm(norms[i]) != fieldLensIn[i] {
t.Errorf("Field length for norm: %v, expected: %d, got %d",
norms[i], fieldLensIn[i], fieldLenFromNorm(norms[i]))
}
}
}
6 changes: 3 additions & 3 deletions new.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ type interimStoredField struct {

type interimFreqNorm struct {
freq uint64
norm float32
norm uint64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed, can leave this float32.

numLocs int
}

Expand Down Expand Up @@ -456,7 +456,7 @@ func (s *interim) processDocument(docNum uint64,
// now that it's been rolled up into fieldTFs, walk that
for fieldID, tfs := range fieldTFs {
dict := s.Dicts[fieldID]
norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
norm := uint64(fieldLens[fieldID])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we can use

norm := math.Float32frombits(fieldLens[fieldID])


for term, tf := range tfs {
pid := dict[term] - 1
Expand Down Expand Up @@ -669,7 +669,7 @@ func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err

err = tfEncoder.Add(docNum,
encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0),
uint64(math.Float32bits(freqNorm.norm)))
freqNorm.norm)
if err != nil {
return 0, nil, err
}
Expand Down
18 changes: 14 additions & 4 deletions posting.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func under32Bits(x uint64) bool {

const DocNum1HitFinished = math.MaxUint64

var NormBits1Hit = uint64(math.Float32bits(float32(1)))
var NormBits1Hit = uint64(1)

// PostingsList is an in-memory representation of a postings list
type PostingsList struct {
Expand Down Expand Up @@ -479,7 +479,7 @@ func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, err
return nil, err
}

rv.norm = math.Float32frombits(uint32(normBits))
rv.norm = normFromFieldLen(normBits)

if i.includeLocs && hasLocs {
// prepare locations into reused slices, where we assume
Expand Down Expand Up @@ -722,7 +722,7 @@ func PostingsIteratorFrom1Hit(docNum1Hit uint64,
type Posting struct {
docNum uint64
freq uint64
norm float32
norm float64
locs []segment.Location
}

Expand All @@ -748,7 +748,7 @@ func (p *Posting) Frequency() uint64 {

// Norm returns the normalization factor for this posting
func (p *Posting) Norm() float64 {
return float64(p.norm)
return p.norm
}

// Locations returns the location information for each occurrence
Expand Down Expand Up @@ -796,3 +796,13 @@ func (l *Location) Pos() uint64 {
func (l *Location) ArrayPositions() []uint64 {
return l.ap
}

func fieldLenFromNorm(norm float64) uint64 {
rv := float64(int(1000*1/norm)) / 1000
return uint64(math.Ceil(rv * rv))
}

func normFromFieldLen(fieldLen uint64) float64 {
rv := float32(1.0 / math.Sqrt(float64(fieldLen)))
return float64(rv)
}