-
Notifications
You must be signed in to change notification settings - Fork 2.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[pkg/ottl] Add MurmurHash3 converter #34155
Changes from 12 commits
3b959b3
15d7046
fe7c1a8
f606b35
8ecf228
599858e
2aa64bf
26ee23c
c53348c
c6a5bef
a09240c
31e7198
63c08ea
ebbaa00
7e72396
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Use this changelog template to create an entry for release notes. | ||
|
||
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' | ||
change_type: enhancement | ||
|
||
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) | ||
component: pkg/ottl | ||
|
||
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). | ||
note: "Add `MurmurHash3` function to convert the `target` to a hexadecimal string of the murmurHash3 hash/digest" | ||
|
||
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. | ||
issues: [34077] | ||
|
||
# (Optional) One or more lines of additional information to render under the primary note. | ||
# These lines will be padded with 2 spaces and then inserted directly into the document. | ||
# Use pipe (|) for multiline entries. | ||
subtext: | ||
|
||
# If your change doesn't affect end users or the exported elements of any package, | ||
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. | ||
# Optional: The change log or logs in which this entry should be included. | ||
# e.g. '[user]' or '[user, api]' | ||
# Include 'user' if the change is relevant to end users. | ||
# Include 'api' if there is a change to a library API. | ||
# Default: '[user]' | ||
change_logs: [user] |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
// Copyright The OpenTelemetry Authors | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs" | ||
|
||
import ( | ||
"context" | ||
"encoding/binary" | ||
"encoding/hex" | ||
"fmt" | ||
|
||
"github.com/twmb/murmur3" | ||
|
||
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" | ||
) | ||
|
||
const ( | ||
v32 = "32" | ||
v128 = "128" // default | ||
) | ||
|
||
type MurmurHash3Arguments[K any] struct { | ||
Target ottl.StringGetter[K] | ||
Version ottl.Optional[string] // 32-bit or 128-bit | ||
} | ||
|
||
func NewMurmurHash3Factory[K any]() ottl.Factory[K] { | ||
return ottl.NewFactory("MurmurHash3", &MurmurHash3Arguments[K]{}, createMurmurHash3Function[K]) | ||
} | ||
|
||
func createMurmurHash3Function[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) { | ||
args, ok := oArgs.(*MurmurHash3Arguments[K]) | ||
|
||
if !ok { | ||
return nil, fmt.Errorf("MurmurHash3Factory args must be of type *MurmurHash3Arguments[K]") | ||
} | ||
|
||
version := v128 | ||
if !args.Version.IsEmpty() { | ||
v := args.Version.Get() | ||
|
||
switch v { | ||
case v32, v128: | ||
version = v | ||
default: | ||
return nil, fmt.Errorf("invalid arguments: %s. Version should be either \"32\" or \"128\"", v) | ||
} | ||
} | ||
|
||
return MurmurHash3HexString(args.Target, version) | ||
} | ||
|
||
// MurmurHash3HexString returns the hexadecimal representation of the hash in little-endian format. | ||
// MurmurHash3, developed by Austin Appleby, is sensitive to endianness. Unlike some other languages like Python, | ||
// which use little-endian for all architectures, the Go library `spaolacci/murmur3` has some open issues | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be helpful to directly reference the open issues being referenced. That way in the future we could remove this if they're ever fixed, and just to be able to quickly see the underlying issue. |
||
// related to endianness compatibility across languages. This function ensures consistency by using | ||
// little-endian and returns the hash value as a hexadecimal string. | ||
func MurmurHash3HexString[K any](target ottl.StringGetter[K], version string) (ottl.ExprFunc[K], error) { | ||
return func(ctx context.Context, tCtx K) (any, error) { | ||
val, err := target.Get(ctx, tCtx) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
switch version { | ||
case v32: | ||
h := murmur3.Sum32([]byte(val)) | ||
b := make([]byte, 4) | ||
binary.LittleEndian.PutUint32(b, h) | ||
return hex.EncodeToString(b), nil | ||
case v128: | ||
h1, h2 := murmur3.Sum128([]byte(val)) | ||
b := make([]byte, 16) | ||
binary.LittleEndian.PutUint64(b[:8], h1) | ||
binary.LittleEndian.PutUint64(b[8:], h2) | ||
return hex.EncodeToString(b), nil | ||
default: | ||
return nil, fmt.Errorf("invalid argument: %s", version) | ||
} | ||
}, nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we make these enum values? (To go along with my other nit about a switch statement)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using an enum means the syntax to call the 32-bit version will be
MurmurHash3("something", version=0)
. I think this is more confusing thanMurmurHash3("something", version="32")
.