Skip to content

Commit

Permalink
crencoding: add UvarintLen functions
Browse files Browse the repository at this point in the history
Add functions to efficiently calculate the length of uvarint
encodings.
  • Loading branch information
RaduBerinde committed Nov 1, 2024
1 parent ddcdee8 commit 0564491
Show file tree
Hide file tree
Showing 3 changed files with 238 additions and 0 deletions.
43 changes: 43 additions & 0 deletions crencoding/var_int.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright 2024 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

package crencoding

import "math/bits"

// UvarintLen32 returns the number of bytes necessary for the Go
// encoding/binary.Uvarint encoding.
//
// It is always equivalent to len(binary.AppendUvarint(nil, uint32(x))) but faster.
func UvarintLen32(x uint32) int {
// We |1 to avoid the special case of x=0.
b := uint32(bits.Len32(x|1)) + 6
// The result is b / 7. Instead of dividing by 7, we multiply by 37 which is
// approximately 2^8/7 and then divide by 2^8. This approximation is exact for
// small values in the range we care about.
return int((b * 37) >> 8)
}

// UvarintLen64 returns the number of bytes necessary for the Go
// encoding/binary.Uvarint encoding.
//
// It is always equivalent to len(binary.AppendUvarint(nil, x)) but faster.
func UvarintLen64(x uint64) int {
// We |1 to avoid the special case of x=0.
b := uint32(bits.Len64(x|1)) + 6
// The result is b / 7. Instead of dividing by 7, we multiply by 37 which is
// approximately 2^8/7 and then divide by 2^8. This approximation is exact for
// small values in the range we care about.
return int((b * 37) >> 8)
}
149 changes: 149 additions & 0 deletions crencoding/var_int_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// Copyright 2024 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

package crencoding

import (
"fmt"
"io"
"math/rand/v2"
"testing"
)

// Sample benchmark results.
//
// Apple M1 (arm64/darwin):
// UvarintLen32/range=100/simple-10 0.64ns ± 1%
// UvarintLen32/range=100/crlib-10 0.86ns ± 2%
// UvarintLen32/range=1000/simple-10 1.11ns ± 1%
// UvarintLen32/range=1000/crlib-10 0.90ns ±12%
// UvarintLen32/range=100000/simple-10 1.89ns ± 1%
// UvarintLen32/range=100000/crlib-10 0.84ns ± 0%
// UvarintLen32/range=1000000000/simple-10 3.05ns ± 1%
// UvarintLen32/range=1000000000/crlib-10 0.84ns ± 0%
// UvarintLen32/range=4000000000/simple-10 2.83ns ± 2%
// UvarintLen32/range=4000000000/crlib-10 0.85ns ± 1%
//
// UvarintLen64/range=100/simple-10 0.63ns ± 1%
// UvarintLen64/range=100/crlib-10 0.84ns ± 0%
// UvarintLen64/range=1000/simple-10 1.10ns ± 1%
// UvarintLen64/range=1000/crlib-10 0.84ns ± 0%
// UvarintLen64/range=100000/simple-10 1.94ns ± 1%
// UvarintLen64/range=100000/crlib-10 0.84ns ± 0%
// UvarintLen64/range=1000000000/simple-10 2.83ns ± 3%
// UvarintLen64/range=1000000000/crlib-10 0.84ns ± 0%
// UvarintLen64/range=1000000000000/simple-10 3.02ns ± 0%
// UvarintLen64/range=1000000000000/crlib-10 0.84ns ± 0%
// UvarintLen64/range=1000000000000000/simple-10 4.88ns ± 1%
// UvarintLen64/range=1000000000000000/crlib-10 0.84ns ± 0%
//
//
// Intel(R) Xeon(R) CPU @ 2.80GHz (amd64, linux):
// UvarintLen32/range=100/simple-24 0.89ns ± 0%
// UvarintLen32/range=100/crlib-24 1.45ns ± 0%
// UvarintLen32/range=1000/simple-24 1.71ns ± 0%
// UvarintLen32/range=1000/crlib-24 1.45ns ± 0%
// UvarintLen32/range=100000/simple-24 2.84ns ± 0%
// UvarintLen32/range=100000/crlib-24 1.45ns ± 0%
// UvarintLen32/range=1000000000/simple-24 4.28ns ± 0%
// UvarintLen32/range=1000000000/crlib-24 1.45ns ± 0%
// UvarintLen32/range=4000000000/simple-24 3.92ns ± 0%
// UvarintLen32/range=4000000000/crlib-24 1.45ns ± 0%
//
// UvarintLen64/range=100/simple-24 0.89ns ± 0%
// UvarintLen64/range=100/crlib-24 1.23ns ± 0%
// UvarintLen64/range=1000/simple-24 1.58ns ± 1%
// UvarintLen64/range=1000/crlib-24 1.23ns ± 0%
// UvarintLen64/range=100000/simple-24 2.74ns ± 0%
// UvarintLen64/range=100000/crlib-24 1.23ns ± 0%
// UvarintLen64/range=1000000000/simple-24 4.26ns ± 0%
// UvarintLen64/range=1000000000/crlib-24 1.23ns ± 1%
// UvarintLen64/range=1000000000000/simple-24 4.27ns ± 0%
// UvarintLen64/range=1000000000000/crlib-24 1.23ns ± 0%
// UvarintLen64/range=1000000000000000/simple-24 7.17ns ± 0%
// UvarintLen64/range=1000000000000000/crlib-24 1.23ns ± 0%

func BenchmarkUvarintLen32(b *testing.B) {
for _, valRange := range []uint32{100, 1000, 100_000, 1_000_000_000, 4_000_000_000} {
b.Run(fmt.Sprintf("range=%d", valRange), func(b *testing.B) {
const numValues = 1024
values := make([]uint32, numValues)
for i := range values {
values[i] = rand.Uint32N(valRange)
}

b.Run("simple", func(b *testing.B) {
var x int
for i := 0; i < b.N; i++ {
x ^= simpleVarUint32Len(values[i&(numValues-1)])
}
fmt.Fprint(io.Discard, x)
})

b.Run("crlib", func(b *testing.B) {
var x int
for i := 0; i < b.N; i++ {
x ^= UvarintLen32(values[i&(numValues-1)])
}
fmt.Fprint(io.Discard, x)
})
})
}
}

func BenchmarkUvarintLen64(b *testing.B) {
for _, valRange := range []uint64{100, 1000, 100_000, 1_000_000_000, 1_000_000_000_000, 1_000_000_000_000_000} {
b.Run(fmt.Sprintf("range=%d", valRange), func(b *testing.B) {
const numValues = 1024
values := make([]uint64, numValues)
for i := range values {
values[i] = rand.Uint64N(valRange)
}

b.Run("simple", func(b *testing.B) {
var x int
for i := 0; i < b.N; i++ {
x ^= simpleVarUint64Len(values[i&(numValues-1)])
}
fmt.Fprint(io.Discard, x)
})

b.Run("crlib", func(b *testing.B) {
var x int
for i := 0; i < b.N; i++ {
x ^= UvarintLen64(values[i&(numValues-1)])
}
fmt.Fprint(io.Discard, x)
})
})
}
}

func simpleVarUint32Len(n uint32) int {
r := 1
for n > 0x80 {
r++
n >>= 7
}
return r
}

func simpleVarUint64Len(n uint64) int {
r := 1
for n > 0x80 {
r++
n >>= 7
}
return r
}
46 changes: 46 additions & 0 deletions crencoding/var_int_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright 2024 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

package crencoding

import (
"encoding/binary"
"math"
"math/rand/v2"
"testing"
)

// TestUvarintLen tests UvarintLen32 and UvarintLen64.
func TestUvarintLen(t *testing.T) {
check := func(n uint64) {
res64 := UvarintLen64(n)
if expected := len(binary.AppendUvarint(nil, n)); res64 != expected {
t.Fatalf("invalid result for %d: %d instead of %d", n, res64, expected)
}
res32 := UvarintLen32(uint32(n))
if expected := len(binary.AppendUvarint(nil, uint64(uint32(n)))); res32 != expected {
t.Fatalf("invalid result for %d: %d instead of %d", n, res32, expected)
}
}
check(0)
check(math.MaxUint64)
for i := uint64(0); i < 64; i++ {
check(1<<i - 1)
check(1 << i)
check(1<<i + 1)
}
for i := 0; i < 100000; i++ {
check(rand.Uint64() >> rand.UintN(64))
}
}

0 comments on commit 0564491

Please sign in to comment.