Skip to content

Commit

Permalink
chore: wip
Browse files Browse the repository at this point in the history
  • Loading branch information
ndabAP committed Jan 21, 2025
1 parent e107dc3 commit db6f891
Show file tree
Hide file tree
Showing 9 changed files with 183 additions and 12 deletions.
1 change: 1 addition & 0 deletions errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
package assocentity
12 changes: 6 additions & 6 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ require (
go.opentelemetry.io/otel v1.29.0 // indirect
go.opentelemetry.io/otel/metric v1.29.0 // indirect
go.opentelemetry.io/otel/trace v1.29.0 // indirect
golang.org/x/crypto v0.31.0 // indirect
golang.org/x/crypto v0.32.0 // indirect
golang.org/x/sync v0.10.0 // indirect
golang.org/x/time v0.8.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20241118233622-e639e219e697 // indirect
Expand All @@ -39,14 +39,14 @@ require (
github.com/googleapis/gax-go v1.0.3 // indirect
github.com/googleapis/gax-go/v2 v2.14.0
go.opencensus.io v0.24.0 // indirect
golang.org/x/exp v0.0.0-20221026153819-32f3d567a233 // indirect
golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 // indirect
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3 // indirect
golang.org/x/mod v0.17.0 // indirect
golang.org/x/net v0.33.0 // indirect
golang.org/x/mod v0.22.0 // indirect
golang.org/x/net v0.34.0 // indirect
golang.org/x/oauth2 v0.24.0 // indirect
golang.org/x/sys v0.28.0 // indirect
golang.org/x/sys v0.29.0 // indirect
golang.org/x/text v0.21.0 // indirect
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
golang.org/x/tools v0.29.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/grpc v1.67.3 // indirect
google.golang.org/protobuf v1.35.2 // indirect
Expand Down
10 changes: 10 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,14 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc=
golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190221220918-438050ddec5e/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20221026153819-32f3d567a233 h1:9bNbSKT4RPLEzne0Xh1v3NaNecsa1DKjkOuTbY6V9rI=
golang.org/x/exp v0.0.0-20221026153819-32f3d567a233/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc=
golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 h1:yqrTHse8TCMW1M1ZCP+VAR/l0kKxwaAIqN/il7x4voA=
golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU=
golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
Expand All @@ -120,6 +124,7 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91
golang.org/x/mod v0.6.0 h1:b9gGHsz9/HhJ3HF5DHQytPpuwocVTChQJK3AvoLRD5I=
golang.org/x/mod v0.6.0/go.mod h1:4mET923SAdbXp2ki8ey+zGs1SLqsuM2Y0uvdZR/fUNI=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
Expand All @@ -134,6 +139,8 @@ golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0=
golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783 h1:nt+Q6cXKz4MosCSpnbMtqiQ8Oz0pxTef2B4Vca2lvfk=
golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783/go.mod h1:h4gKUeWbJ4rQPri7E0u6Gs4e9Ri2zaLxzw5DI5XGrYg=
Expand All @@ -157,6 +164,8 @@ golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
Expand All @@ -182,6 +191,7 @@ golang.org/x/tools v0.2.0 h1:G6AHpWxTMGY1KyEYoAQ5WTtIekUUvDNjan3ugu60JvE=
golang.org/x/tools v0.2.0/go.mod h1:y4OqIKeOV/fWJetJ8bXPU1sEVniLMIyDAZWeHdV+NTA=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
golang.org/x/tools v0.29.0/go.mod h1:KMQVMRsVxU6nHCFXrBPhDB8XncLNLM0lIy/F14RP588=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/api v0.102.0 h1:JxJl2qQ85fRMPNvlZY/enexbxpCjLwGhZUtgfGeQ51I=
Expand Down
8 changes: 3 additions & 5 deletions source.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,18 @@ func (s source[T]) Tokenize(
var tokens Tokens[T]

// Entities
tokens.entities = make([][]tokenize.Token[T], 0)
tokens.entities = make([][]tokenize.Token[T], len(s.Entities))
for i, entity := range s.Entities {
tok, err := tokenizer.Tokenize(ctx, entity)
if err != nil {
return tokens, err
}

for _, t := range tok {
tokens.entities[i] = append(tokens.entities[i], t)
}
tokens.entities[i] = append(tokens.entities[i], tok...)
}

// Texts
tokens.texts = make([][]tokenize.Token[T], 0)
tokens.texts = make([][]tokenize.Token[T], len(s.Texts))
for i, text := range s.Texts {
select {
case <-ctx.Done():
Expand Down
133 changes: 133 additions & 0 deletions source_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
package assocentity

import (
"context"
"reflect"
"testing"

"github.com/ndabAP/assocentity/v15/tokenize"
"github.com/ndabAP/assocentity/v15/tokenize/delimiter"
)

func TestNewSource(t *testing.T) {
type test struct {
name string
entities []any
texts []any
want source[any]
}

tests := []test{
{
name: "empty",
entities: []any{},
texts: []any{},
want: source[any]{Entities: []any{}, Texts: []any{}},
},
{
name: "only text",
entities: []any{},
texts: []any{"No Payne, No Gain.", "You can't win this one, Max."},
want: source[any]{Entities: []any{}, Texts: []any{"No Payne, No Gain.", "You can't win this one, Max."}},
},
{
name: "only entities",
entities: []any{"Max Payne", "Max", "Payne"},
texts: []any{},
want: source[any]{Entities: []any{"Max Payne", "Max", "Payne"}, Texts: []any{}},
},
{
name: "duplicate entities",
entities: []any{"Max Payne", "Max", "Max", "Payne"},
texts: []any{},
want: source[any]{Entities: []any{"Max Payne", "Max", "Payne"}, Texts: []any{}},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := NewSource(tt.entities, tt.texts)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("NewSource() = %v, want %v", got, tt.want)
}
})
}
}

func TestSource_Tokenize(t *testing.T) {
type test struct {
name string
entities []string
texts []string
tokenizer tokenize.Tokenizer[string]
mut []WithMut[string]
want Tokens[string]
wantErr bool
}

tests := []test{
{
name: "empty",
entities: []string{},
texts: []string{},
tokenizer: delimiter.New(nil),
want: Tokens[string]{entities: [][]tokenize.Token[string]{}, texts: [][]tokenize.Token[string]{}},
},
{
name: "only entities",
entities: []string{"Max Payne", "Max", "Payne"},
texts: []string{},
tokenizer: delimiter.New(func(r rune) bool {
return r == ' '
}),
want: Tokens[string]{entities: [][]tokenize.Token[string]{
{
{Text: "Max"},
{Text: "Payne"},
},
{
{Text: "Max"},
},
{
{Text: "Payne"},
},
}, texts: [][]tokenize.Token[string]{}},
},
{
name: "only text",
entities: []string{},
texts: []string{"No Payne, No Gain."},
tokenizer: delimiter.New(func(r rune) bool {
switch r {
case ' ', ',', '.':
return true
default:
return false
}
}),
want: Tokens[string]{entities: [][]tokenize.Token[string]{}, texts: [][]tokenize.Token[string]{
{
{Text: "No"},
{Text: "Payne"},
{Text: "No"},
{Text: "Gain"},
},
}},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := NewSource(tt.entities, tt.texts)
got, err := s.Tokenize(context.Background(), tt.tokenizer, tt.mut...)
if (err != nil) != tt.wantErr {
t.Errorf("Tokenize() error = %v, wantErr %v", err, tt.wantErr)
return
}

if !reflect.DeepEqual(got, tt.want) {
t.Errorf("Tokenize() got = %v, want %v", got, tt.want)
}
})
}
}
29 changes: 29 additions & 0 deletions tokenize/delimiter/tokenizer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package delimiter

import (
"context"
"strings"

"github.com/ndabAP/assocentity/v15/tokenize"
)

type delim struct {
delim func(rune) bool
}

func New(f func(rune) bool) delim {
return delim{f}
}

func (d delim) Tokenize(ctx context.Context, text string) ([]tokenize.Token[string], error) {
tokens := make([]tokenize.Token[string], 0)

spl := strings.FieldsFunc(text, d.delim)
for _, s := range spl {
tokens = append(tokens, tokenize.Token[string]{
Text: s,
})
}

return tokens, nil
}
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion tokens_distance.go → tokens_vec.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"github.com/ndabAP/assocentity/v15/tokenize"
)

func (tokens Tokens[T]) Vec() {
func (tokens Tokens[T]) Vecs() {
cmp := func(
text []tokenize.Token[T],
iterator func(entities []tokenize.Token[T]) iter.Seq2[int, tokenize.Token[T]],
Expand Down

0 comments on commit db6f891

Please sign in to comment.