diff --git a/LICENSE b/LICENSE index d69b668..281de00 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020 Julian Claus +Copyright (c) 2025 Julian Claus Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile index e82ffef..8c582b4 100644 --- a/Makefile +++ b/Makefile @@ -15,17 +15,14 @@ build: windows linux darwin @echo version: $(VERSION) windows: $(WINDOWS) - -linux: $(LINUX) - -darwin: $(DARWIN) - $(WINDOWS): env GOOS=windows GOARCH=amd64 go build -v -o bin/$(WINDOWS) -ldflags="-s -w -X main.version=$(VERSION)" ./cli/main.go +linux: $(LINUX) $(LINUX): env GOOS=linux GOARCH=amd64 go build -v -o bin/$(LINUX) -ldflags="-s -w -X main.version=$(VERSION)" ./cli/main.go +darwin: $(DARWIN) $(DARWIN): env GOOS=darwin GOARCH=amd64 go build -v -o bin/$(DARWIN) -ldflags="-s -w -X main.version=$(VERSION)" ./cli/main.go diff --git a/README.md b/README.md index 4c84719..c37f11f 100644 --- a/README.md +++ b/README.md @@ -1,166 +1,3 @@ -# assocentity - -[![Go Report Card](https://goreportcard.com/badge/github.com/ndabAP/assocentity/v13)](https://goreportcard.com/report/github.com/ndabAP/assocentity/v13) - -Package assocentity is a social science tool to analyze the relative distance -from tokens to entities. The motiviation is to make conclusions based on the -distance from interesting tokens to a certain entity and its synonyms. - -## Features - -- Provide your own tokenizer -- Provides a default NLP tokenizer (by Google) -- Define aliases for entities -- Provides a multi-OS, language-agnostic CLI version - -## Installation - -```bash -$ go get github.com/ndabAP/assocentity/v13 -``` - -## Prerequisites - -If you want to analyze human readable texts you can use the provided Natural -Language tokenizer (powered by Google). To do so, sign-up for a Cloud Natural -Language API service account key and download the generated JSON file. This -equals the `credentialsFile` at the example below. You should never commit that -file. - -A possible offline tokenizer would be a white space tokenizer. You also might -use a parser depending on your purposes. - -## Example - -We would like to find out which adjectives are how close in average to a certain -public person. Let's take George W. Bush and 1,000 NBC news articles as an -example. "George Bush" is the entity and synonyms are "George Walker Bush" and -"Bush" and so on. The text is each of the 1,000 NBC news articles. - -Defining a text source and to set the entity would be first step. Next, we need -to instantiate our tokenizer. In this case, we use the provided Google NLP -tokenizer. Finally, we can calculate our mean distances. We can use -`assocentity.Distances`, which accepts multiple texts. Notice -how we pass `tokenize.ADJ` to only include adjectives as part of speech. -Finally, we can take the mean by passing the result to `assocentity.Mean`. - -```go -// Define texts source and entity -texts := []string{ - "Former Presidents Barack Obama, Bill Clinton and ...", // Truncated - "At the pentagon on the afternoon of 9/11, ...", - "Tony Blair moved swiftly to place his relationship with ...", -} -entities := []string{ - "Goerge Walker Bush", - "Goerge Bush", - "Bush", -} -source := assocentity.NewSource(entities, texts) - -// Instantiate the NLP tokenizer (powered by Google) -nlpTok := nlp.NewNLPTokenizer(credentialsFile, nlp.AutoLang) - -// Get the distances to adjectives -ctx := context.TODO() -dists, err := assocentity.Distances(ctx, nlpTok, tokenize.ADJ, source) -if err != nil { - // Handle error -} -// Get the mean from the distances -mean := assocentity.Mean(dists) -``` - -### Tokenization - -If you provide your own tokenizer you must implement the interface with the -method `Tokenize` and the following signature: - -```go -type Tokenizer interface { - Tokenize(ctx context.Context, text string) ([]Token, error) -} -``` - -`Token` is of type: - -```go -type Token struct { - PoS PoS // Part of speech - Text string // Text -} - -// Part of speech -type PoS int -``` - -For example, given the text: - -```go -text := "Punchinello was burning to get me" -``` - -The result from `Tokenize` would be: - -```go -[]Token{ - { - Text: "Punchinello", - PoS: tokenize.NOUN, - }, - { - Text: "was", - PoS: tokenize.VERB, - }, - { - Text: "burning", - PoS: tokenize.VERB, - }, - { - Text: "to", - PoS: tokenize.PRT, - }, - { - Text: "get", - PoS: tokenize.VERB, - }, - { - Text: "me", - PoS: tokenize.PRON, - }, -} -``` - -## CLI - -There is also a language-agnostic terminal version available for either Windows, -Mac (Darwin) or Linux (only with 64-bit support) if you don't have Go available. -The application expects the text from "stdin" and accepts the following flags: - -| Flag | Description | Type | Default | -| ------------- | ------------------------------------------------------------------------------------------------- | -------- | ------- | -| `entities` | Define entities to be searched within input, example: `-entities="Max Payne,Payne"` | `string` | | -| `gog-svc-loc` | Google Clouds NLP JSON service account file, example: `-gog-svc-loc="/home/max/gog-svc-loc.json"` | `string` | | -| `op` | Operation to excute: `-op="mean"` | `string` | `mean` | -| `pos` | Defines part of speeches to keep, example: `-pos=noun,verb,pron` | `string` | `any` | - -Example: - -```bash -echo "Relax, Max. You're a nice guy." | ./bin/assocentity_linux_amd64_v13.0.0-0-g948274a-dirty -gog-svc-loc=/home/max/.config/assocentity/google-service.json -entities="Max Payne,Payne,Max" -``` - -The output is written to "stdout" in appropoiate formats. - -## Projects using assocentity - -- [entityscrape](https://github.com/ndabAP/entityscrape) - Distance between word - types (default: adjectives) in news articles and persons - -## Author - -[Julian Claus](https://www.julian-claus.de) and contributors. - -## License - -MIT +- Source.Tokenize() and Source.Sentiment() (different NLP API) +- Which leader uses more swear words +- Count(...args) \ No newline at end of file diff --git a/assocentity.go b/assocentity.go deleted file mode 100644 index b6a3469..0000000 --- a/assocentity.go +++ /dev/null @@ -1,185 +0,0 @@ -package assocentity - -import ( - "context" - "math" - - "github.com/ndabAP/assocentity/v13/internal/comp" - "github.com/ndabAP/assocentity/v13/internal/iterator" - "github.com/ndabAP/assocentity/v13/internal/pos" - "github.com/ndabAP/assocentity/v13/tokenize" -) - -// source wraps entities and texts -type source struct { - Entities []string - Texts []string -} - -// NewSource returns a new source consisting of entities and texts -func NewSource(entities, texts []string) source { - return source{ - Entities: entities, - Texts: texts, - } -} - -// Distances returns the distances from entities to a list of texts -func Distances( - ctx context.Context, - tokenizer tokenize.Tokenizer, - poS tokenize.PoS, - source source, -) (map[tokenize.Token][]float64, error) { - var ( - dists = make(map[tokenize.Token][]float64) - err error - ) - for _, text := range source.Texts { - d, err := distances(ctx, tokenizer, poS, text, source.Entities) - if err != nil { - return dists, err - } - - for tok, dist := range d { - dists[tok] = append(dists[tok], dist...) - } - } - - return dists, err -} - -// distances returns the distances to entities for one text -func distances( - ctx context.Context, - tokenizer tokenize.Tokenizer, - poS tokenize.PoS, - text string, - entities []string, -) (map[tokenize.Token][]float64, error) { - var ( - dists = make(map[tokenize.Token][]float64) - err error - ) - - // Tokenize text - textTokens, err := tokenizer.Tokenize(ctx, text) - if err != nil { - return dists, err - } - - // Tokenize entities - var entityTokens [][]tokenize.Token - for _, entity := range entities { - tokens, err := tokenizer.Tokenize(ctx, entity) - if err != nil { - return dists, err - } - entityTokens = append(entityTokens, tokens) - } - - // Determinate part of speech - posDetermer := pos.NewPoSDetermer(poS) - determTokens := posDetermer.DetermPoS(textTokens, entityTokens) - - // Check if any given PoS was found in text tokens - if len(determTokens) == 0 { - return dists, nil - } - - // Create iterators - - determTokensIter := iterator.New(determTokens) - - // Use iterators to search for entities in positive and negative direction - posDirIter := iterator.New(determTokens) - negDirIter := iterator.New(determTokens) - - entityTokensIter := iterator.New(entityTokens) - - // Iterate through part of speech determinated text tokens - for determTokensIter.Next() { - // If the current text token is an entity, we skip about the entity - currDetermTokensPos := determTokensIter.CurrPos() - isEntity, entity := comp.TextWithEntities( - determTokensIter, - entityTokensIter, - comp.DirPos, - ) - if isEntity { - determTokensIter.Forward(len(entity) - 1) - continue - } - - // Now we can collect the actual distances - - // Finds/counts entities in positive direction - posDirIter.SetPos(currDetermTokensPos) - for posDirIter.Next() { - // [I, was, (with), Max, Payne, here] -> true, Max Payne - // [I, was, with, Max, Payne, (here)] -> false, "" - isEntity, entity := comp.TextWithEntities( - posDirIter, - entityTokensIter, - comp.DirPos, - ) - if isEntity { - appendDist(dists, determTokensIter, posDirIter) - // Skip about entity - posDirIter.Forward(len(entity) - 1) // Next increments - } - } - - // Finds/counts entities in negative direction - negDirIter.SetPos(currDetermTokensPos) - for negDirIter.Prev() { - // [I, was, (with), Max, Payne, here] -> false, "" - // [I, was, with, Max, Payne, (here)] -> true, Max Payne - isEntity, entity := comp.TextWithEntities( - negDirIter, - entityTokensIter, - comp.DirNeg, - ) - if isEntity { - appendDist(dists, determTokensIter, negDirIter) - negDirIter.Rewind(len(entity) - 1) - } - } - } - - return dists, err -} - -// Helper to append a float64 to a map of tokens and distances -func appendDist( - m map[tokenize.Token][]float64, - k *iterator.Iterator[tokenize.Token], - v *iterator.Iterator[tokenize.Token], -) { - token := k.CurrElem() - dist := math.Abs(float64(v.CurrPos() - k.CurrPos())) - m[token] = append(m[token], dist) -} - -// Mean returns the mean of the provided distances -func Mean(dists map[tokenize.Token][]float64) map[tokenize.Token]float64 { - mean := make(map[tokenize.Token]float64) - for token, d := range dists { - mean[token] = meanFloat64(d) - } - return mean -} - -// Returns the mean of a 64-bit float slice -func meanFloat64(xs []float64) float64 { - // Prevent /0 - if len(xs) == 0 { - return 0 - } - - sum := 0.0 - for _, x := range xs { - sum += x - } - return sum / float64(len(xs)) -} diff --git a/assocentity_test.go b/assocentity_test.go deleted file mode 100644 index 775c509..0000000 --- a/assocentity_test.go +++ /dev/null @@ -1,340 +0,0 @@ -package assocentity - -import ( - "context" - "reflect" - "strings" - "testing" - - "github.com/ndabAP/assocentity/v13/tokenize" -) - -// whiteSpaceTokenizer tokenizes a text by empty space and assigns unknown -// pos -type whiteSpaceTokenizer int - -func (t whiteSpaceTokenizer) Tokenize(ctx context.Context, text string) ([]tokenize.Token, error) { - spl := strings.Split(text, " ") - tokens := make([]tokenize.Token, 0) - for _, s := range spl { - tokens = append(tokens, tokenize.Token{ - PoS: tokenize.UNKN, - Text: s, - }) - } - - return tokens, nil -} - -func TestMean(t *testing.T) { - type args struct { - ctx context.Context - tokenizer tokenize.Tokenizer - poS tokenize.PoS - texts []string - entities []string - } - tests := []struct { - args args - want map[tokenize.Token]float64 - wantErr bool - }{ - { - args: args{ - ctx: context.Background(), - tokenizer: new(whiteSpaceTokenizer), - poS: tokenize.ANY, - texts: []string{ - "AA B $ CCC ++", - "$ E ++ AA $ B", - }, - entities: []string{"$", "++"}, - }, - want: map[tokenize.Token]float64{ - { - PoS: tokenize.UNKN, - Text: "AA", - }: 2.2, - { - PoS: tokenize.UNKN, - Text: "B", - }: 2.6, - { - PoS: tokenize.UNKN, - Text: "CCC", - }: 1, - { - PoS: tokenize.UNKN, - Text: "E", - }: 1.6666666666666667, - }, - }, - } - for _, tt := range tests { - t.Run("", func(t *testing.T) { - source := NewSource(tt.args.entities, tt.args.texts) - dists, err := Distances( - tt.args.ctx, - tt.args.tokenizer, - tt.args.poS, - source, - ) - if err != nil { - t.Error(err) - } - - got := Mean(dists) - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("Mean() = %v, want %v", got, tt.want) - } - }) - } -} - -// concreteTokenizer is a tokenizer with a fixed set of tokens -type concreteTokenizer int - -func (t concreteTokenizer) Tokenize(ctx context.Context, text string) ([]tokenize.Token, error) { - spl := strings.Split(text, " ") - tokens := make([]tokenize.Token, 0) - for _, s := range spl { - var poS tokenize.PoS - switch s { - case "English": - poS = tokenize.NOUN - - case ".": - poS = tokenize.PUNCT - - case "run": - poS = tokenize.VERB - - default: - continue - } - - tokens = append(tokens, tokenize.Token{ - PoS: poS, - Text: s, - }) - } - - return tokens, nil -} - -func Test_distances(t *testing.T) { - type args struct { - ctx context.Context - tokenizer tokenize.Tokenizer - poS tokenize.PoS - text string - entities []string - } - tests := []struct { - args args - want map[tokenize.Token][]float64 - wantErr bool - }{ - { - args: args{ - ctx: context.Background(), - tokenizer: new(concreteTokenizer), - poS: tokenize.NOUN | tokenize.PUNCT | tokenize.VERB, - text: "English x . x xx run", - entities: []string{"run"}, - }, - want: map[tokenize.Token][]float64{ - { - PoS: tokenize.NOUN, - Text: "English", - }: {2}, - { - PoS: tokenize.PUNCT, - Text: ".", - }: {1}, - }, - }, - } - for _, tt := range tests { - t.Run("", func(t *testing.T) { - got, err := distances( - tt.args.ctx, - tt.args.tokenizer, - tt.args.poS, - tt.args.text, - tt.args.entities, - ) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("dist() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestNormalize(t *testing.T) { - t.Run("HumandReadableNormalizer", func(t *testing.T) { - got := map[tokenize.Token][]float64{ - { - PoS: tokenize.UNKN, - Text: "A", - }: {}, - { - PoS: tokenize.UNKN, - Text: "a", - }: {}, - { - PoS: tokenize.UNKN, - Text: "b", - }: {}, - { - PoS: tokenize.UNKN, - Text: "&", - }: {}, - } - want := map[tokenize.Token][]float64{ - { - PoS: tokenize.UNKN, - Text: "a", - }: {}, - { - PoS: tokenize.UNKN, - Text: "b", - }: {}, - { - PoS: tokenize.UNKN, - Text: "and", - }: {}, - } - Normalize(got, HumandReadableNormalizer) - - if !reflect.DeepEqual(got, want) { - t.Errorf("Normalize() = %v, want %v", got, want) - } - }) -} - -func TestThreshold(t *testing.T) { - type args struct { - dists map[tokenize.Token][]float64 - threshold float64 - } - tests := []struct { - args args - want map[tokenize.Token][]float64 - }{ - { - args: args{ - dists: map[tokenize.Token][]float64{ - { - PoS: tokenize.UNKN, - Text: "A", - }: {1}, - { - PoS: tokenize.UNKN, - Text: "B", - }: {1, 1}, - { - PoS: tokenize.UNKN, - Text: "C", - }: {1, 1, 1}, - { - PoS: tokenize.UNKN, - Text: "D", - }: {1, 1, 1}, - }, - threshold: 75, - }, - want: map[tokenize.Token][]float64{ - { - PoS: tokenize.UNKN, - Text: "C", - }: {1, 1, 1}, - { - PoS: tokenize.UNKN, - Text: "D", - }: {1, 1, 1}, - }, - }, - { - args: args{ - dists: map[tokenize.Token][]float64{ - { - PoS: tokenize.UNKN, - Text: "A", - }: {1}, - { - PoS: tokenize.UNKN, - Text: "B", - }: {1, 1}, - { - PoS: tokenize.UNKN, - Text: "C", - }: {1, 1, 1}, - { - PoS: tokenize.UNKN, - Text: "D", - }: {1, 1, 1, 1}, - }, - threshold: 76, - }, - want: map[tokenize.Token][]float64{ - { - PoS: tokenize.UNKN, - Text: "D", - }: {1, 1, 1, 1}, - }, - }, - { - args: args{ - dists: map[tokenize.Token][]float64{ - { - PoS: tokenize.UNKN, - Text: "A", - }: {1}, - { - PoS: tokenize.UNKN, - Text: "B", - }: {1, 1}, - { - PoS: tokenize.UNKN, - Text: "C", - }: {1, 1, 1}, - { - PoS: tokenize.UNKN, - Text: "D", - }: {1, 1, 1, 1}, - }, - threshold: 1, - }, - want: map[tokenize.Token][]float64{ - { - PoS: tokenize.UNKN, - Text: "A", - }: {1}, - { - PoS: tokenize.UNKN, - Text: "B", - }: {1, 1}, - { - PoS: tokenize.UNKN, - Text: "C", - }: {1, 1, 1}, - { - PoS: tokenize.UNKN, - Text: "D", - }: {1, 1, 1, 1}, - }, - }, - } - for _, tt := range tests { - t.Run("", func(t *testing.T) { - Threshold(tt.args.dists, tt.args.threshold) - if !reflect.DeepEqual(tt.args.dists, tt.want) { - t.Errorf("Threshold() = %v, want %v", tt.args.dists, tt.want) - } - }) - } -} diff --git a/bin/assocentity_darwin_amd64_v13.0.3-0-g9d9e27c-dirty b/bin/assocentity_darwin_amd64_v13.0.3-0-g9d9e27c-dirty deleted file mode 100755 index f6fc464..0000000 Binary files a/bin/assocentity_darwin_amd64_v13.0.3-0-g9d9e27c-dirty and /dev/null differ diff --git a/bin/assocentity_linux_amd64_v13.0.3-0-g9d9e27c-dirty b/bin/assocentity_linux_amd64_v13.0.3-0-g9d9e27c-dirty deleted file mode 100755 index 96663ec..0000000 Binary files a/bin/assocentity_linux_amd64_v13.0.3-0-g9d9e27c-dirty and /dev/null differ diff --git a/bin/assocentity_windows_amd64_v13.0.3-0-g9d9e27c-dirty.exe b/bin/assocentity_windows_amd64_v13.0.3-0-g9d9e27c-dirty.exe deleted file mode 100755 index 9738e51..0000000 Binary files a/bin/assocentity_windows_amd64_v13.0.3-0-g9d9e27c-dirty.exe and /dev/null differ diff --git a/cli/main.go b/cli/main.go deleted file mode 100644 index 6a5b4a5..0000000 --- a/cli/main.go +++ /dev/null @@ -1,151 +0,0 @@ -package main - -import ( - "context" - "encoding/csv" - "errors" - "flag" - "fmt" - "io" - "log" - "os" - "strings" - - "github.com/ndabAP/assocentity/v13" - "github.com/ndabAP/assocentity/v13/nlp" - "github.com/ndabAP/assocentity/v13/tokenize" -) - -var logger = log.Default() - -func init() { - log.SetFlags(0) - logger.SetOutput(os.Stderr) - flag.Parse() -} - -var ( - entitiesF = flag.String( - "entities", - "", - "Define entities to be searched within input, example: -entities=\"Max Payne,Payne\"", - ) - gogSvcLocF = flag.String( - "gog-svc-loc", - "", - "Google Clouds NLP JSON service account file, example: -gog-svc-loc=\"~/gog-svc-loc.json\"", - ) - opF = flag.String( - "op", - "mean", - "Operation to execute", - ) - posF = flag.String( - "pos", - "any", - "Defines part of speeches to be included, example: -pos=noun,verb,pron", - ) -) - -func main() { - if len(*gogSvcLocF) == 0 { - printHelpAndFail(errors.New("missing google service account file")) - } - - // Read text from stdin - textBytes, err := io.ReadAll(os.Stdin) - if err != nil { - printHelpAndFail(err) - } - if len(textBytes) == 0 { - printHelpAndFail(errors.New("empty text")) - } - - credentialsFilename := *gogSvcLocF - nlpTok := nlp.NewNLPTokenizer(credentialsFilename, nlp.AutoLang) - - // Set part of speech - posArr := strings.Split(*posF, ",") - if len(posArr) == 0 { - printHelpAndFail(errors.New("missing pos")) - } - // Parse part of speech flag and use PoS type - poS := parsePoS(posArr) - - // Prepare text and entities - text := string(textBytes) - entities := strings.Split(*entitiesF, ",") - if len(entities) == 0 { - printHelpAndFail(errors.New("missing entities")) - } - - // Recover to provide an unified API response - defer func() { - if r := recover(); r != nil { - printHelpAndFail(r) - } - }() - - // Should we set a timeout? - var ctx = context.Background() - - switch *opF { - case "mean": - source := assocentity.NewSource(entities, []string{text}) - dists, err := assocentity.Distances( - ctx, - nlpTok, - poS, - source, - ) - if err != nil { - printHelpAndFail(err) - } - mean := assocentity.Mean(dists) - - // Write CSV to stdout - csvwr := csv.NewWriter(os.Stdout) - defer csvwr.Flush() - for tok, dist := range mean { - poS, ok := tokenize.PoSMapStr[tok.PoS] - if !ok { - printHelpAndFail(errors.New("unassigned part of speech")) - } - record := []string{ - // Text - tok.Text, - // Part of speech - poS, - // Distance - fmt.Sprintf("%f", dist), - } - if err := csvwr.Write(record); err != nil { - printHelpAndFail(err) - } - } - - default: - printHelpAndFail(errors.New("unknown operation")) - } -} - -// ["noun", "adj", "verb"] -> 11 -func parsePoS(posArr []string) (pos tokenize.PoS) { - for _, p := range posArr { - if p, ok := tokenize.PoSMap[p]; ok { - // Add bits - pos += p - } - } - return -} - -func printHelpAndFail(reason any) { - logger.Println(reason) - logger.Println() - logger.Println("Usage:") - logger.Println() - flag.PrintDefaults() - - os.Exit(1) -} diff --git a/go.mod b/go.mod index f1ec677..8ea7190 100644 --- a/go.mod +++ b/go.mod @@ -1,33 +1,54 @@ -module github.com/ndabAP/assocentity/v13 +module github.com/ndabAP/assocentity/v14 -go 1.18 +go 1.23.5 require ( - cloud.google.com/go v0.34.0 + cloud.google.com/go v0.116.0 github.com/joho/godotenv v1.3.0 - google.golang.org/api v0.102.0 - google.golang.org/genproto v0.0.0-20221024183307-1bc688fe9f3e + google.golang.org/api v0.214.0 + google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 +) + +require ( + cloud.google.com/go/auth v0.13.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.6 // indirect + cloud.google.com/go/compute/metadata v0.6.0 // indirect + cloud.google.com/go/language v1.14.3 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/google/s2a-go v0.1.8 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect + go.opentelemetry.io/otel v1.29.0 // indirect + go.opentelemetry.io/otel/metric v1.29.0 // indirect + go.opentelemetry.io/otel/trace v1.29.0 // indirect + golang.org/x/crypto v0.31.0 // indirect + golang.org/x/sync v0.10.0 // indirect + golang.org/x/time v0.8.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20241118233622-e639e219e697 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 // indirect ) require ( github.com/BurntSushi/toml v0.3.1 // indirect - github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect - github.com/golang/protobuf v1.5.2 // indirect - github.com/google/go-cmp v0.5.9 // indirect - github.com/googleapis/enterprise-certificate-proxy v0.2.0 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect github.com/googleapis/gax-go v1.0.3 // indirect - github.com/googleapis/gax-go/v2 v2.7.0 - go.opencensus.io v0.23.0 // indirect + github.com/googleapis/gax-go/v2 v2.14.0 + go.opencensus.io v0.24.0 // indirect golang.org/x/exp v0.0.0-20221026153819-32f3d567a233 // indirect golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3 // indirect - golang.org/x/mod v0.6.0 // indirect - golang.org/x/net v0.7.0 // indirect - golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783 // indirect - golang.org/x/sys v0.5.0 // indirect - golang.org/x/text v0.7.0 // indirect - golang.org/x/tools v0.2.0 // indirect - google.golang.org/appengine v1.6.7 // indirect - google.golang.org/grpc v1.50.1 // indirect - google.golang.org/protobuf v1.28.1 // indirect + golang.org/x/mod v0.17.0 // indirect + golang.org/x/net v0.33.0 // indirect + golang.org/x/oauth2 v0.24.0 // indirect + golang.org/x/sys v0.28.0 // indirect + golang.org/x/text v0.21.0 // indirect + golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect + google.golang.org/appengine v1.6.8 // indirect + google.golang.org/grpc v1.67.3 // indirect + google.golang.org/protobuf v1.35.2 // indirect honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc // indirect ) diff --git a/go.sum b/go.sum index 20722e7..5612d82 100644 --- a/go.sum +++ b/go.sum @@ -1,19 +1,40 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0 h1:eOI3/cP2VTU6uZLDYAoic+eyzzB9YyGmJ7eIjl8rOPg= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.116.0 h1:B3fRrSDkLRt5qSHWe40ERJvhvnQwdZiHu0bJOpldweE= +cloud.google.com/go v0.116.0/go.mod h1:cEPSRWPzZEswwdr9BxE6ChEn01dWlTaF05LiC2Xs70U= +cloud.google.com/go/auth v0.13.0 h1:8Fu8TZy167JkW8Tj3q7dIkr2v4cndv41ouecJx0PAHs= +cloud.google.com/go/auth v0.13.0/go.mod h1:COOjD9gwfKNKz+IIduatIhYJQIc0mG3H102r/EMxX6Q= +cloud.google.com/go/auth/oauth2adapt v0.2.6 h1:V6a6XDu2lTwPZWOawrAa9HUK+DB2zfJyTuciBG5hFkU= +cloud.google.com/go/auth/oauth2adapt v0.2.6/go.mod h1:AlmsELtlEBnaNTL7jCj8VQFLy6mbZv0s4Q7NGBeQ5E8= +cloud.google.com/go/compute v1.29.0 h1:Lph6d8oPi38NHkOr6S55Nus/Pbbcp37m/J0ohgKAefs= +cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I= +cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg= +cloud.google.com/go/language v1.14.3 h1:8hmFMiS3wjjj3TX/U1zZYTgzwZoUjDbo9PaqcYEmuB4= +cloud.google.com/go/language v1.14.3/go.mod h1:hjamj+KH//QzF561ZuU2J+82DdMlFUjmiGVWpovGGSA= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:tluoj9z5200jBnyusfRPU2LqT6J+DAorxEvtC7LHB+E= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -29,6 +50,8 @@ github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -38,25 +61,52 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= +github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.2.0 h1:y8Yozv7SZtlU//QXbezB6QkpuE6jMD2/gfzk4AftXjs= github.com/googleapis/enterprise-certificate-proxy v0.2.0/go.mod h1:8C0jb7/mgJe/9KK8Lm7X9ctZC2t60YyIpYEI16jx0Qg= +github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw= +github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA= github.com/googleapis/gax-go v1.0.3 h1:9dMLqhaibYONnDRcnHdUs9P8Mw64jLlZTYlDe3leBtQ= github.com/googleapis/gax-go v1.0.3/go.mod h1:QyXYajJFdARxGzjwUfbDFIse7Spkw81SJ4LrBJXtlQ8= github.com/googleapis/gax-go/v2 v2.0.2/go.mod h1:LLvjysVCY1JZeum8Z6l8qUty8fiNwE08qbEPm1M08qg= github.com/googleapis/gax-go/v2 v2.7.0 h1:IcsPKeInNvYi7eqSaDjiZqDDKu5rsmunY0Y1YupQSSQ= github.com/googleapis/gax-go/v2 v2.7.0/go.mod h1:TEop28CZZQ2y+c0VxMUmu1lV+fQx57QpBWsYpwqHJx8= +github.com/googleapis/gax-go/v2 v2.14.0 h1:f+jMrjBPl+DL9nI4IQzLUxMq7XrAqFYB7hBPqMNIe8o= +github.com/googleapis/gax-go/v2 v2.14.0/go.mod h1:lhBCnjdLrWRaPvLWhmc8IS24m9mr07qSYnHncrgo+zk= github.com/joho/godotenv v1.3.0 h1:Zjp+RcGpHhGlrMbJzXTrZZPrWj+1vfm90La1wgB6Bhc= github.com/joho/godotenv v1.3.0/go.mod h1:7hK45KPybAkOC6peb+G5yklZfMxEjkZhHbwpqxOKXbg= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= +go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 h1:r6I7RJCN86bpD/FQwedZ0vSixDpwuWREjW9oRMsmqDc= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0/go.mod h1:B9yO6b04uB80CzjedvewuqDhxJxi11s7/GtiGa8bAjI= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8= +go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw= +go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8= +go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc= +go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8= +go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4= +go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190221220918-438050ddec5e/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20221026153819-32f3d567a233 h1:9bNbSKT4RPLEzne0Xh1v3NaNecsa1DKjkOuTbY6V9rI= @@ -66,54 +116,94 @@ golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTk golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3 h1:XQyxROzUlZH+WIQwySDgnISgOivlhjIEwaQaJEJrrN0= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.6.0 h1:b9gGHsz9/HhJ3HF5DHQytPpuwocVTChQJK3AvoLRD5I= golang.org/x/mod v0.6.0/go.mod h1:4mET923SAdbXp2ki8ey+zGs1SLqsuM2Y0uvdZR/fUNI= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783 h1:nt+Q6cXKz4MosCSpnbMtqiQ8Oz0pxTef2B4Vca2lvfk= golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783/go.mod h1:h4gKUeWbJ4rQPri7E0u6Gs4e9Ri2zaLxzw5DI5XGrYg= +golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= +golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg= +golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.2.0 h1:G6AHpWxTMGY1KyEYoAQ5WTtIekUUvDNjan3ugu60JvE= golang.org/x/tools v0.2.0/go.mod h1:y4OqIKeOV/fWJetJ8bXPU1sEVniLMIyDAZWeHdV+NTA= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/api v0.102.0 h1:JxJl2qQ85fRMPNvlZY/enexbxpCjLwGhZUtgfGeQ51I= google.golang.org/api v0.102.0/go.mod h1:3VFl6/fzoA+qNuS1N1/VfXY4LjoXN/wzeIp7TweWwGo= +google.golang.org/api v0.214.0 h1:h2Gkq07OYi6kusGOaT/9rnNljuXmqPnaig7WGPmKbwA= +google.golang.org/api v0.214.0/go.mod h1:bYPpLG8AyeMWwDU6NXoB00xC0DFkikVvd5MfwoxjLqE= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20221024183307-1bc688fe9f3e h1:S9GbmC1iCgvbLyAokVCwiO6tVIrU9Y7c5oMx1V/ki/Y= google.golang.org/genproto v0.0.0-20221024183307-1bc688fe9f3e/go.mod h1:9qHF0xnpdSfF6knlcsnpzUu5y+rpwgbvsyGAZPBMg4s= +google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 h1:ToEetK57OidYuqD4Q5w+vfEnPvPpuTwedCNVohYJfNk= +google.golang.org/genproto v0.0.0-20241118233622-e639e219e697/go.mod h1:JJrvXBWRZaFMxBufik1a4RpFw4HhgVtBBWQeQgUj2cc= +google.golang.org/genproto/googleapis/api v0.0.0-20241118233622-e639e219e697 h1:pgr/4QbFyktUv9CtQ/Fq4gzEE6/Xs7iCXbktaGzLHbQ= +google.golang.org/genproto/googleapis/api v0.0.0-20241118233622-e639e219e697/go.mod h1:+D9ySVjN8nY8YCVjc5O7PZDIdZporIDY3KaGfJunh88= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 h1:8ZmaLZE4XWrtU3MyClkYqqtl6Oegr3235h7jxsDyqCY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU= google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= @@ -122,6 +212,8 @@ google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8 google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= google.golang.org/grpc v1.50.1 h1:DS/BukOZWp8s6p4Dt/tOaJaTQyPyOoCcrjroHuCeLzY= google.golang.org/grpc v1.50.1/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI= +google.golang.org/grpc v1.67.3 h1:OgPcDAFKHnH8X3O4WcO4XUc8GRDeKsKReqbQtiCj7N8= +google.golang.org/grpc v1.67.3/go.mod h1:YGaHCc6Oap+FzBJTZLBzkGSYt/cvGPFTPxkn7QfSU8s= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -135,8 +227,11 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= +google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc h1:/hemPrYIhOhy8zYrNj+069zDB68us2sMGsfkFJO0iZs= diff --git a/internal/comp/comp.go b/internal/comp/comp.go deleted file mode 100644 index 53eda72..0000000 --- a/internal/comp/comp.go +++ /dev/null @@ -1,72 +0,0 @@ -package comp - -import ( - "github.com/ndabAP/assocentity/v13/internal/iterator" - "github.com/ndabAP/assocentity/v13/tokenize" -) - -type Direction int - -var ( - DirPos Direction = 1 - DirNeg Direction = -1 -) - -// Checks if current text token is entity and if, returns entity -func TextWithEntities(textIter *iterator.Iterator[tokenize.Token], entityTokensIter *iterator.Iterator[[]tokenize.Token], entityIterDir Direction) (bool, []tokenize.Token) { - // Reset iterators before and comparing - entityTokensIter.Reset() - defer entityTokensIter.Reset() - currTextPos := textIter.CurrPos() - defer textIter.SetPos(currTextPos) - - // By default, we assume an entity - var isEntity bool = true - - for entityTokensIter.Next() { - // Reset - isEntity = true - - entityIter := iterator.New(entityTokensIter.CurrElem()) - - switch entityIterDir { - - // -> - case DirPos: - for entityIter.Next() { - // Check if text token matches the entity token - if !eqItersElems(textIter, entityIter) { - isEntity = false - } - - // Advance text iterator to compare against - textIter.Next() - } - - // <- - case DirNeg: - // We scan backwards and start from top - entityIter.SetPos(entityIter.Len()) // [1, 2, 3],(4) - for entityIter.Prev() { // [1, 2, (3)] - if !eqItersElems(textIter, entityIter) { - isEntity = false - } - - textIter.Prev() - } - } - - if isEntity { - return true, entityTokensIter.CurrElem() - } - - // Reset to compare with next entity tokens - textIter.SetPos(currTextPos) - } - - return false, []tokenize.Token{} -} - -func eqItersElems(x *iterator.Iterator[tokenize.Token], y *iterator.Iterator[tokenize.Token]) bool { - return x.CurrElem() == y.CurrElem() -} diff --git a/internal/comp/comp_test.go b/internal/comp/comp_test.go deleted file mode 100644 index bd40a9f..0000000 --- a/internal/comp/comp_test.go +++ /dev/null @@ -1,212 +0,0 @@ -package comp_test - -import ( - "reflect" - "testing" - - "github.com/ndabAP/assocentity/v13/internal/comp" - "github.com/ndabAP/assocentity/v13/internal/iterator" - "github.com/ndabAP/assocentity/v13/tokenize" -) - -func TestTextWithEntity(t *testing.T) { - type args struct { - textIter *iterator.Iterator[tokenize.Token] - entityTokensIter *iterator.Iterator[[]tokenize.Token] - dir comp.Direction - } - tests := []struct { - name string - args args - want bool - want1 []tokenize.Token - }{ - { - name: "no entity", - args: args{ - textIter: iterator.New([]tokenize.Token{ - { - PoS: tokenize.ADP, - Text: "Without", - }, - { - PoS: tokenize.NOUN, - Text: "Mona", - }, - { - PoS: tokenize.PRT, - Text: "'s'", - }, - { - PoS: tokenize.NOUN, - Text: "help", - }, - { - PoS: tokenize.PUNCT, - Text: ",", - }, - { - PoS: tokenize.PRON, - Text: "I", - }, - { - PoS: tokenize.VERB, - Text: "'d'", - }, - { - PoS: tokenize.VERB, - Text: "be", - }, - { - PoS: tokenize.DET, - Text: "a", - }, - { - PoS: tokenize.ADJ, - Text: "dead", - }, - { - PoS: tokenize.NOUN, - Text: "man", - }, - }), - entityTokensIter: iterator.New([][]tokenize.Token{ - { - { - PoS: tokenize.NOUN, - Text: "Alex", - }, - }, - }), - dir: comp.DirPos, - }, - want: false, - want1: make([]tokenize.Token, 0), - }, - { - name: "one entity", - args: args{ - textIter: iterator.New([]tokenize.Token{ - { - PoS: tokenize.ADP, - Text: "Without", - }, - { - PoS: tokenize.NOUN, - Text: "Mona", - }, - { - PoS: tokenize.PRT, - Text: "'s'", - }, - { - PoS: tokenize.NOUN, - Text: "help", - }, - { - PoS: tokenize.PUNCT, - Text: ",", - }, - { - PoS: tokenize.PRON, - Text: "I", - }, - { - PoS: tokenize.VERB, - Text: "'d'", - }, - { - PoS: tokenize.VERB, - Text: "be", - }, - { - PoS: tokenize.DET, - Text: "a", - }, - { - PoS: tokenize.ADJ, - Text: "dead", - }, - { - PoS: tokenize.NOUN, - Text: "man", - }, - }).SetPos(1), - entityTokensIter: iterator.New([][]tokenize.Token{ - { - tokenize.Token{ - PoS: tokenize.NOUN, - Text: "Mona", - }, - }, - }), - dir: comp.DirPos, - }, - want: true, - want1: []tokenize.Token{ - { - PoS: tokenize.NOUN, - Text: "Mona", - }, - }, - }, - { - name: "one two tokens long entity", - args: args{ - textIter: iterator.New([]tokenize.Token{ - { - PoS: tokenize.ANY, - Text: "a", - }, - { - PoS: tokenize.ANY, - Text: "a", - }, - { - PoS: tokenize.ANY, - Text: "b", - }, - { - PoS: tokenize.ANY, - Text: "b", - }, - }).SetPos(2), - entityTokensIter: iterator.New([][]tokenize.Token{ - { - tokenize.Token{ - PoS: tokenize.ANY, - Text: "b", - }, - tokenize.Token{ - PoS: tokenize.ANY, - Text: "b", - }, - }, - }), - dir: comp.DirPos, - }, - want: true, - want1: []tokenize.Token{ - { - PoS: tokenize.ANY, - Text: "b", - }, - { - PoS: tokenize.ANY, - Text: "b", - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, got1 := comp.TextWithEntities(tt.args.textIter, tt.args.entityTokensIter, tt.args.dir) - if got != tt.want { - t.Errorf("TextWithEntity() got = %v, want %v", got, tt.want) - } - if !reflect.DeepEqual(got1, tt.want1) { - t.Errorf("TextWithEntity() got1 = %v, want %v", got1, tt.want1) - } - }) - } -} diff --git a/internal/iterator/iterator.go b/internal/iterator/iterator.go deleted file mode 100644 index 3175502..0000000 --- a/internal/iterator/iterator.go +++ /dev/null @@ -1,77 +0,0 @@ -package iterator - -type Iterator[T any] struct { - el T - elems []T - len int - pos int -} - -func New[T any](elems []T) *Iterator[T] { - return &Iterator[T]{ - *new(T), - elems, - len(elems), - -1, - } -} - -func (it *Iterator[T]) Next() bool { - if it.pos+1 >= it.len { - return false - } - it.pos++ - it.el = it.elems[it.pos] - return true -} - -func (it *Iterator[T]) Prev() bool { - if it.pos-1 < 0 { - return false - } - it.pos-- - it.el = it.elems[it.pos] - return true -} - -func (it *Iterator[T]) Reset() *Iterator[T] { - it.pos = -1 - it.el = *new(T) - return it -} - -func (it *Iterator[T]) CurrPos() int { - return it.pos -} - -func (it *Iterator[T]) CurrElem() T { - return it.el -} - -func (it *Iterator[T]) Len() int { - return it.len -} - -func (it *Iterator[T]) SetPos(pos int) *Iterator[T] { - it.pos = pos - it.setEl() - return it -} - -func (it *Iterator[T]) Rewind(pos int) *Iterator[T] { - it.pos -= pos - it.setEl() - return it -} - -func (it *Iterator[T]) Forward(pos int) *Iterator[T] { - it.pos += pos - it.setEl() - return it -} - -func (it *Iterator[T]) setEl() { - if len(it.elems)-1 > it.pos && it.pos >= 0 { - it.el = it.elems[it.pos] - } -} diff --git a/internal/iterator/iterator_test.go b/internal/iterator/iterator_test.go deleted file mode 100644 index c4f441d..0000000 --- a/internal/iterator/iterator_test.go +++ /dev/null @@ -1,67 +0,0 @@ -package iterator_test - -import ( - "testing" - - "github.com/ndabAP/assocentity/v13/internal/iterator" -) - -var testElems = []int{1, 2, 3, 3, 1, 5, 6} - -func TestNav(t *testing.T) { - it := iterator.New(testElems) - - it.Next() - if it.CurrElem() != testElems[0] { - t.Errorf("CurrElem() got = %v, want = %v", it.CurrElem(), testElems[0]) - } - - it.Prev() - if it.CurrElem() != testElems[0] { - t.Errorf("CurrElem() got = %v, want = %v", it.CurrElem(), testElems[0]) - } - - it.Forward(1) - if it.CurrElem() != testElems[1] { - t.Errorf("CurrElem() got = %v, want = %v", it.CurrElem(), testElems[1]) - } - - it.Rewind(1) - if it.CurrElem() != testElems[0] { - t.Errorf("CurrElem() got = %v, want = %v", it.CurrElem(), testElems[0]) - } - - it.Reset() - // We need an independent counter - i := 0 - for it.Next() { - if testElems[i] != it.CurrElem() { - t.Errorf("CurrElem() got = %v, want = %v", it.CurrElem(), testElems[i]) - } - i++ - } - - it.SetPos(len(testElems)) - i = len(testElems) - 1 - for it.Prev() { - if testElems[i] != it.CurrElem() { - t.Errorf("CurrElem() got = %v, want = %v", it.CurrElem(), testElems[i]) - } - i-- - } -} - -func TestCurrElem(t *testing.T) { - it := iterator.New(testElems) - - it.SetPos(1) - if it.CurrElem() != testElems[1] { - t.Errorf("SetPos(1) got = %v, want = %v", it.CurrElem(), testElems[1]) - } - - it.Reset() - it.Next() - if it.CurrElem() != testElems[0] { - t.Errorf("Reset() got = %v, want = %v", it.CurrElem(), testElems[1]) - } -} diff --git a/internal/pos/pos_determ.go b/internal/pos/pos_determ.go deleted file mode 100644 index 6c628dc..0000000 --- a/internal/pos/pos_determ.go +++ /dev/null @@ -1,45 +0,0 @@ -package pos - -import ( - "github.com/ndabAP/assocentity/v13/internal/comp" - "github.com/ndabAP/assocentity/v13/internal/iterator" - "github.com/ndabAP/assocentity/v13/tokenize" -) - -// poSDetermer represents the default part of speech determinator -type poSDetermer struct{ poS tokenize.PoS } - -// NewPoSDetermer returns a new default part of speech determinator -func NewPoSDetermer(poS tokenize.PoS) poSDetermer { return poSDetermer{poS} } - -// DetermPoS deterimantes if a part of speech tag should be kept. It always -// appends entities -func (dps poSDetermer) DetermPoS(textTokens []tokenize.Token, entityTokens [][]tokenize.Token) []tokenize.Token { - // If any part of speech, no need to determinate - if dps.poS == tokenize.ANY { - return textTokens - } - - var determTokens []tokenize.Token - - textIter := iterator.New(textTokens) - entityTokensIter := iterator.New(entityTokens) - - for textIter.Next() { - currTextPos := textIter.CurrPos() - isEntity, entity := comp.TextWithEntities(textIter, entityTokensIter, comp.DirPos) - if isEntity { - textIter.SetPos(currTextPos + len(entity)) - // Entity is always kept - determTokens = append(determTokens, entity...) - continue - } - - // Non-entity tokens - if textIter.CurrElem().PoS&dps.poS != 0 { - determTokens = append(determTokens, textIter.CurrElem()) - } - } - - return determTokens -} diff --git a/internal/pos/pos_determ_test.go b/internal/pos/pos_determ_test.go deleted file mode 100644 index 166ffb5..0000000 --- a/internal/pos/pos_determ_test.go +++ /dev/null @@ -1,171 +0,0 @@ -package pos - -import ( - "reflect" - "testing" - - "github.com/ndabAP/assocentity/v13/tokenize" -) - -func TestPoSDetermer_DetermPoS(t *testing.T) { - type fields struct { - poS tokenize.PoS - } - type args struct { - textTokens []tokenize.Token - entityTokens [][]tokenize.Token - } - tests := []struct { - name string - fields fields - args args - want []tokenize.Token - }{ - { - name: "any", - fields: fields{ - poS: tokenize.ANY, - }, - args: args{ - textTokens: []tokenize.Token{ - {PoS: tokenize.NOUN, Text: "Cold"}, - {PoS: tokenize.ADP, Text: "as"}, - {PoS: tokenize.DET, Text: "a"}, - {PoS: tokenize.NOUN, Text: "gun"}, - }, - entityTokens: [][]tokenize.Token{ - { - { - Text: "Max", - PoS: tokenize.NOUN, - }, - { - Text: "Payne", - PoS: tokenize.NOUN, - }, - }, - }, - }, - want: []tokenize.Token{ - {PoS: tokenize.NOUN, Text: "Cold"}, - {PoS: tokenize.ADP, Text: "as"}, - {PoS: tokenize.DET, Text: "a"}, - {PoS: tokenize.NOUN, Text: "gun"}, - }, - }, - { - name: "noun", - fields: fields{ - poS: tokenize.NOUN, - }, - args: args{ - textTokens: []tokenize.Token{ - {PoS: tokenize.NOUN, Text: "Cold"}, - {PoS: tokenize.ADP, Text: "as"}, - {PoS: tokenize.DET, Text: "a"}, - {PoS: tokenize.NOUN, Text: "gun"}, - }, - entityTokens: [][]tokenize.Token{ - { - { - Text: "Max", - PoS: tokenize.NOUN, - }, - { - Text: "Payne", - PoS: tokenize.NOUN, - }, - }, - }, - }, - want: []tokenize.Token{ - {PoS: tokenize.NOUN, Text: "Cold"}, - {PoS: tokenize.NOUN, Text: "gun"}, - }, - }, - { - name: "noun, adposition", - fields: fields{ - poS: tokenize.NOUN | tokenize.ADP, - }, - args: args{ - textTokens: []tokenize.Token{ - {PoS: tokenize.NOUN, Text: "Cold"}, - {PoS: tokenize.ADP, Text: "as"}, - {PoS: tokenize.DET, Text: "a"}, - {PoS: tokenize.NOUN, Text: "gun"}, - }, - entityTokens: [][]tokenize.Token{ - { - { - Text: "Max", - PoS: tokenize.NOUN, - }, - { - Text: "Payne", - PoS: tokenize.NOUN, - }, - }, - }, - }, - want: []tokenize.Token{ - {PoS: tokenize.NOUN, Text: "Cold"}, - {PoS: tokenize.ADP, Text: "as"}, - {PoS: tokenize.NOUN, Text: "gun"}, - }, - }, - { - name: "skip entity", - fields: fields{ - poS: tokenize.VERB, - }, - args: args{ - textTokens: []tokenize.Token{ - {PoS: tokenize.VERB, Text: "Relax"}, - {PoS: tokenize.PUNCT, Text: ","}, - {PoS: tokenize.NOUN, Text: "Max"}, - {PoS: tokenize.PUNCT, Text: "."}, - {PoS: tokenize.PRON, Text: "You"}, - {PoS: tokenize.VERB, Text: "'re"}, - {PoS: tokenize.DET, Text: "a"}, - {PoS: tokenize.ADJ, Text: "nice"}, - {PoS: tokenize.NOUN, Text: "guy"}, - {PoS: tokenize.PUNCT, Text: "."}, - }, - entityTokens: [][]tokenize.Token{ - { - { - Text: "Max", - PoS: tokenize.NOUN, - }, - { - Text: "Payne", - PoS: tokenize.NOUN, - }, - }, - { - { - Text: "Max", - PoS: tokenize.NOUN, - }, - }, - }, - }, - want: []tokenize.Token{ - {PoS: tokenize.VERB, Text: "Relax"}, - {PoS: tokenize.NOUN, Text: "Max"}, - {PoS: tokenize.VERB, Text: "'re"}, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - dps := poSDetermer{ - poS: tt.fields.poS, - } - if got := dps.DetermPoS(tt.args.textTokens, tt.args.entityTokens); !reflect.DeepEqual(got, tt.want) { - t.Errorf("NLPPoSDetermer.DetermPoS() = %v, want %v", got, tt.want) - } - }) - } -} diff --git a/nlp/errors.go b/nlp/errors.go new file mode 100644 index 0000000..9fa3b88 --- /dev/null +++ b/nlp/errors.go @@ -0,0 +1,5 @@ +package nlp + +import "errors" + +var ErrMaxRetries = errors.New("max retries reached") diff --git a/nlp/tokenize.go b/nlp/tokenize.go deleted file mode 100644 index 2023a54..0000000 --- a/nlp/tokenize.go +++ /dev/null @@ -1,143 +0,0 @@ -package nlp - -import ( - "context" - "errors" - "time" - - language "cloud.google.com/go/language/apiv1" - "github.com/googleapis/gax-go/v2/apierror" - "github.com/ndabAP/assocentity/v13/tokenize" - "google.golang.org/api/option" - "google.golang.org/genproto/googleapis/api/error_reason" - languagepb "google.golang.org/genproto/googleapis/cloud/language/v1" -) - -var ( - ErrMaxRetries = errors.New("max retries reached") -) - -var poSMap = map[languagepb.PartOfSpeech_Tag]tokenize.PoS{ - languagepb.PartOfSpeech_ADJ: tokenize.ADJ, - languagepb.PartOfSpeech_ADP: tokenize.ADP, - languagepb.PartOfSpeech_ADV: tokenize.ADV, - languagepb.PartOfSpeech_AFFIX: tokenize.AFFIX, - languagepb.PartOfSpeech_CONJ: tokenize.CONJ, - languagepb.PartOfSpeech_DET: tokenize.DET, - languagepb.PartOfSpeech_NOUN: tokenize.NOUN, - languagepb.PartOfSpeech_NUM: tokenize.NUM, - languagepb.PartOfSpeech_PRON: tokenize.PRON, - languagepb.PartOfSpeech_PRT: tokenize.PRT, - languagepb.PartOfSpeech_PUNCT: tokenize.PUNCT, - languagepb.PartOfSpeech_UNKNOWN: tokenize.UNKN, - languagepb.PartOfSpeech_VERB: tokenize.VERB, - languagepb.PartOfSpeech_X: tokenize.X, -} - -// AutoLang tries to automatically recognize the language -var AutoLang string = "auto" - -// NLPTokenizer tokenizes a text using Google NLP -type NLPTokenizer struct { - credsFilename string - lang string -} - -// NewNLPTokenizer returns a new NLP tokenizer instance. Note that NLPTokenizer -// has a built-in retrier -func NewNLPTokenizer(credentialsFilename string, lang string) tokenize.Tokenizer { - return NLPTokenizer{ - credsFilename: credentialsFilename, - lang: lang, - } -} - -// Tokenize tokenizes a text -func (nlp NLPTokenizer) Tokenize(ctx context.Context, text string) ([]tokenize.Token, error) { - res, err := nlp.req(ctx, text) - if err != nil { - return []tokenize.Token{}, err - } - - tokens := make([]tokenize.Token, 0) - for _, tok := range res.GetTokens() { - if _, ok := poSMap[tok.PartOfSpeech.Tag]; !ok { - return tokens, errors.New("can't find pos match") - } - - tokens = append(tokens, tokenize.Token{ - PoS: poSMap[tok.PartOfSpeech.Tag], - Text: tok.GetText().GetContent(), - }) - } - return tokens, nil -} - -// req sends a request to the Google server. It retries if the API rate limited -// is reached -func (nlp NLPTokenizer) req(ctx context.Context, text string) (*languagepb.AnnotateTextResponse, error) { - client, err := language.NewClient(ctx, option.WithCredentialsFile(nlp.credsFilename)) - if err != nil { - return &languagepb.AnnotateTextResponse{}, err - } - - defer client.Close() - - doc := &languagepb.Document{ - Source: &languagepb.Document_Content{ - Content: text, - }, - Type: languagepb.Document_PLAIN_TEXT, - } - // Set the desired language if not auto - if nlp.lang != AutoLang { - doc.Language = nlp.lang - } - - // Google rate limit timeout - const apiRateTimeout = 1.0 // In Minutes - var ( - // Google errors - apiErr *apierror.APIError - errReasonRateLimitExceeded = error_reason.ErrorReason_RATE_LIMIT_EXCEEDED.String() - - delay = apiRateTimeout - delayMult = 1.05 // Delay multiplier - retries = 0 - ) - const ( - delayGrowth = 1.05 // Delay growth rate - maxRetries = 6 - ) - // Retry request up to maxRetries times if rate limit exceeded with an - // growing delay - for { - if retries >= maxRetries { - return &languagepb.AnnotateTextResponse{}, ErrMaxRetries - } - - // Do the actual request - res, err := client.AnnotateText(ctx, &languagepb.AnnotateTextRequest{ - Document: doc, - Features: &languagepb.AnnotateTextRequest_Features{ - ExtractSyntax: true, - }, - EncodingType: languagepb.EncodingType_UTF8, - }) - // Check for rate limit exceeded error to retry - if errors.As(err, &apiErr) { - if apiErr.Reason() == errReasonRateLimitExceeded { - time.Sleep(time.Minute * time.Duration(delay)) - - // Retryer logic - retries += 1 - delay *= delayMult - delayMult *= delayGrowth - - continue - } - } else { - return res, err - } - } -} diff --git a/nlp/tokenize_test.go b/nlp/tokenize_test.go deleted file mode 100644 index 8fe6896..0000000 --- a/nlp/tokenize_test.go +++ /dev/null @@ -1,77 +0,0 @@ -package nlp_test - -import ( - "context" - "os" - "reflect" - "testing" - - "github.com/joho/godotenv" - "github.com/ndabAP/assocentity/v13/nlp" - "github.com/ndabAP/assocentity/v13/tokenize" -) - -func TestTokenize(t *testing.T) { - if testing.Short() { - t.SkipNow() - } - - if err := godotenv.Load("../.env"); err != nil { - t.Fatal(err) - } - - credentialsFile := os.Getenv("GOOGLE_NLP_SERVICE_ACCOUNT_FILE_LOCATION") - - tests := []struct { - text string - want []tokenize.Token - wantErr bool - }{ - { - text: "Punchinello was burning to get me", - want: []tokenize.Token{ - { - Text: "Punchinello", - PoS: tokenize.NOUN, - }, - { - Text: "was", - PoS: tokenize.VERB, - }, - { - Text: "burning", - PoS: tokenize.VERB, - }, - { - Text: "to", - PoS: tokenize.PRT, - }, - { - Text: "get", - PoS: tokenize.VERB, - }, - { - Text: "me", - PoS: tokenize.PRON, - }, - }, - wantErr: false, - }, - } - for _, tt := range tests { - t.Run("", func(t *testing.T) { - nlp := nlp.NewNLPTokenizer( - credentialsFile, - nlp.AutoLang, - ) - got, err := nlp.Tokenize(context.Background(), tt.text) - if (err != nil) != tt.wantErr { - t.Errorf("nlp.Tokenize() error = %v, wantErr %v", err, tt.wantErr) - return - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("nlp.Tokenize() = %v, want %v", got, tt.want) - } - }) - } -} diff --git a/nlp/tokenizer.go b/nlp/tokenizer.go new file mode 100644 index 0000000..629349a --- /dev/null +++ b/nlp/tokenizer.go @@ -0,0 +1,141 @@ +package nlp + +import ( + "context" + "errors" + "time" + + language "cloud.google.com/go/language/apiv1beta2" + "cloud.google.com/go/language/apiv1beta2/languagepb" + "github.com/googleapis/gax-go/v2/apierror" + "github.com/ndabAP/assocentity/v15/tokenize" + "google.golang.org/api/option" + "google.golang.org/genproto/googleapis/api/error_reason" +) + +// AutoLang tries to automatically recognize the language +var AutoLang string = "auto" + +// nlp tokenizers a text using Google NLP +type nlp struct { + creds string + lang string +} + +// NewNLP returns a new NLP tokenizerr instance. Note that NLPTokenizer +// has a built-in retrier +func NewNLP(creds string, lang string) tokenize.Tokenizer { + return nlp{ + creds: creds, + lang: lang, + } +} + +// Tokenize tokenizers a text +func (nlp nlp) Tokenize(ctx context.Context, text string) ([]tokenize.Token, error) { + res, err := nlp.req(ctx, text) + if err != nil { + return []tokenize.Token{}, err + } + + tokens := make([]tokenize.Token, 0) + for _, tok := range res.GetTokens() { + var ( + text = tok.GetText().GetContent() + pos = tokenize.PartOfSpeech{ + Aspect: tokenize.Aspect(tok.PartOfSpeech.GetAspect()), + Case: tokenize.Case(tok.PartOfSpeech.GetCase()), + Form: tokenize.Form(tok.PartOfSpeech.GetForm()), + Gender: tokenize.Gender(tok.PartOfSpeech.GetGender()), + Mood: tokenize.Mood(tok.PartOfSpeech.GetMood()), + Number: tokenize.Number(tok.PartOfSpeech.GetNumber()), + Proper: tokenize.Proper(tok.PartOfSpeech.GetProper()), + Person: tokenize.Person(tok.PartOfSpeech.GetPerson()), + Tag: tokenize.Tag(tok.PartOfSpeech.GetTag()), + Tense: tokenize.Tense(tok.PartOfSpeech.GetTense()), + Voice: tokenize.Voice(tok.PartOfSpeech.GetVoice()), + } + ) + tokens = append(tokens, tokenize.Token{ + PartOfSpeech: pos, + Text: text, + }) + } + + return tokens, nil +} + +// req sends a request to the Google server. It retries if the API rate limited +// is reached +func (nlp nlp) req(ctx context.Context, text string) (*languagepb.AnnotateTextResponse, error) { + client, err := language.NewClient(ctx, option.WithCredentialsFile(nlp.creds)) + if err != nil { + return &languagepb.AnnotateTextResponse{}, err + } + defer client.Close() + + doc := &languagepb.Document{ + Source: &languagepb.Document_Content{ + Content: text, + }, + Type: languagepb.Document_PLAIN_TEXT, + } + // Set the desired language if not auto + if nlp.lang != AutoLang { + doc.Language = nlp.lang + } + + // Retry request up to maxretries times if rate limit exceeded with an + // growing delay + const ( + growth = 1.05 // Delay growth rate + maxretries = 6 + timeout = 1.0 // In Minutes + ) + var ( + // Retrier + delay = timeout + mult = 1.05 // Delay multiplier + retries = 0 + ) + + // Google rate limit timeout + errRateLimitExceededStr := error_reason.ErrorReason_RATE_LIMIT_EXCEEDED.String() + for { + select { + case <-ctx.Done(): + return &languagepb.AnnotateTextResponse{}, ctx.Err() + default: + } + + // Retrier exhausted + if retries >= maxretries { + return &languagepb.AnnotateTextResponse{}, ErrMaxRetries + } + + // Do the actual request + res, err := client.AnnotateText(ctx, &languagepb.AnnotateTextRequest{ + Document: doc, + Features: &languagepb.AnnotateTextRequest_Features{ + ExtractSyntax: true, + }, + EncodingType: languagepb.EncodingType_UTF8, + }) + // Check for rate limit exceeded error to eventually retry + var e *apierror.APIError + if errors.As(err, &e) { + if e.Reason() == errRateLimitExceededStr { + time.Sleep(time.Minute * time.Duration(delay)) + + // Retryer logic + retries++ + delay *= mult + mult *= growth + + continue + } + } + + return res, err + } +} diff --git a/normalizer.go b/normalizer.go deleted file mode 100644 index c99b9fb..0000000 --- a/normalizer.go +++ /dev/null @@ -1,62 +0,0 @@ -package assocentity - -import ( - "strings" - - "github.com/ndabAP/assocentity/v13/tokenize" -) - -// Normalizer normalizes tokens like lower casing them -type Normalizer func(tokenize.Token) tokenize.Token - -// HumandReadableNormalizer normalizes tokens through lower casing them and -// replacing them with their synonyms -var HumandReadableNormalizer Normalizer = func(tok tokenize.Token) tokenize.Token { - t := tokenize.Token{ - PoS: tok.PoS, - Text: strings.ToLower(tok.Text), - } - - // This can increase the result data quality and could include more synonyms - switch tok.Text { - case "&": - t.Text = "and" - } - - return t -} - -// Normalize normalizes tokens with provided normalizer -func Normalize(dists map[tokenize.Token][]float64, norm Normalizer) { - for tok, d := range dists { - t := norm(tok) - - // Check if text is the same as non-normalized - if t == tok { - continue - } - if _, ok := dists[t]; ok { - dists[t] = append(dists[tok], d...) - } else { - dists[t] = d - } - - delete(dists, tok) - } -} - -// Threshold excludes results that are below the given threshold. The threshold -// is described through the amount of distances per token relative to the total -// amount of tokens -func Threshold(dists map[tokenize.Token][]float64, threshold float64) { - // Length of dists is amount of total tokens - distsN := len(dists) - for tok, d := range dists { - dN := len(d) - // Amount of distances per token relative to the amount of all tokens - t := (float64(dN) / float64(distsN)) * 100 - if t < threshold { - delete(dists, tok) - } - } -} diff --git a/source.go b/source.go new file mode 100644 index 0000000..58f82e8 --- /dev/null +++ b/source.go @@ -0,0 +1,28 @@ +package assocentity + +import "slices" + +type ( + // source wraps entities and texts + source struct { + Entities []string + Texts []string + } +) + +// NewSource returns a new source, consisting of entities and texts. Duplicate +// entities are removed +func NewSource(entities, texts []string) source { + // De-duplicate entities + e := make([]string, 0) + for _, entity := range entities { + if !slices.Contains(e, entity) { + e = append(e, entity) + } + } + + return source{ + Entities: e, + Texts: texts, + } +} diff --git a/tokenize/interface.go b/tokenize/interface.go new file mode 100644 index 0000000..e7152c1 --- /dev/null +++ b/tokenize/interface.go @@ -0,0 +1,10 @@ +package tokenize + +import ( + "context" +) + +// Tokenizer tokenizes a text and entities +type Tokenizer interface { + Tokenize(ctx context.Context, text string) ([]Token, error) +} diff --git a/tokenize/pos.go b/tokenize/pos.go new file mode 100644 index 0000000..59f5457 --- /dev/null +++ b/tokenize/pos.go @@ -0,0 +1,16 @@ +package tokenize + +type PartOfSpeech struct { + Aspect Aspect + Case Case + Form Form + Gender Gender + Mood Mood + Number Number + Person Person + Proper Proper + Reciprocity Reciprocity + Tag Tag + Tense Tense + Voice Voice +} diff --git a/tokenize/pos_aspect.go b/tokenize/pos_aspect.go new file mode 100644 index 0000000..c0fab4e --- /dev/null +++ b/tokenize/pos_aspect.go @@ -0,0 +1,11 @@ +package tokenize + +type Aspect int + +// Aspect +const ( + ASPECT_UNKOWN Aspect = iota + ASPECT_PERFECTIVE + ASPECT_IMPERFECTIVE + ASPECT_PROGRESSIVE +) diff --git a/tokenize/pos_case.go b/tokenize/pos_case.go new file mode 100644 index 0000000..b4b6cc1 --- /dev/null +++ b/tokenize/pos_case.go @@ -0,0 +1,21 @@ +package tokenize + +type Case int + +const ( + CASE_UNKNOWN Case = iota + CASE_ACCUSATIVE + CASE_ADVERBIAL + CASE_COMPLEMENTIVE + CASE_DATIVE + CASE_GENITIVE + CASE_INSTRUMENTAL + CASE_LOCATIVE + CASE_NOMINATIVE + CASE_OBLIQUE + CASE_PARTITIVE + CASE_PREPOSITIONAL + CASE_REFLEXIVE_CASE + CASE_RELATIVE_CASE + CASE_VOCATIVE +) diff --git a/tokenize/pos_form.go b/tokenize/pos_form.go new file mode 100644 index 0000000..660000a --- /dev/null +++ b/tokenize/pos_form.go @@ -0,0 +1,18 @@ +package tokenize + +type Form int + +const ( + FORM_UNKNOWN Form = iota + FORM_ADNOMIAL + FORM_AUXILIARY + FORM_COMPLEMENTIZER + FORM_FINAL_ENDING + FORM_GERUND + FORM_REALIS + FORM_IRREALIS + FORM_SHORT + FORM_LONG + FORM_ORDER + FORM_SPECIFIC +) diff --git a/tokenize/pos_gender.go b/tokenize/pos_gender.go new file mode 100644 index 0000000..71bf966 --- /dev/null +++ b/tokenize/pos_gender.go @@ -0,0 +1,10 @@ +package tokenize + +type Gender int + +const ( + GENDER_UNKNOWN Gender = iota + GENDER_FEMININE + GENDER_MASCULINE + GENDER_NEUTER +) diff --git a/tokenize/pos_mood.go b/tokenize/pos_mood.go new file mode 100644 index 0000000..a409450 --- /dev/null +++ b/tokenize/pos_mood.go @@ -0,0 +1,13 @@ +package tokenize + +type Mood int + +const ( + MOOD_UNKNOWN Mood = iota + MOOD_CONDITIONAL_MOOD + MOOD_IMPERATIVE + MOOD_INDICATIVE + MOOD_INTERROGATIVE + MOOD_JUSSIVE + MOOD_SUBJUNCTIVE +) diff --git a/tokenize/pos_number.go b/tokenize/pos_number.go new file mode 100644 index 0000000..dbc4d7e --- /dev/null +++ b/tokenize/pos_number.go @@ -0,0 +1,10 @@ +package tokenize + +type Number int + +const ( + NUMBER_UNKNOWN Number = iota + NUMBER_SINGULAR + NUMBER_PLURAL + NUMBER_DUAL +) diff --git a/tokenize/pos_person.go b/tokenize/pos_person.go new file mode 100644 index 0000000..fdd21e3 --- /dev/null +++ b/tokenize/pos_person.go @@ -0,0 +1,11 @@ +package tokenize + +type Person int + +const ( + PERSON_UNKNOWN Person = iota + PERSON_FIRST + PERSON_SECOND + PERSON_THIRD + PERSON_REFLEXIVE +) diff --git a/tokenize/pos_proper.go b/tokenize/pos_proper.go new file mode 100644 index 0000000..cb9a28e --- /dev/null +++ b/tokenize/pos_proper.go @@ -0,0 +1,9 @@ +package tokenize + +type Proper int + +const ( + PROPER_UNKNOWN Person = iota + PROPER_PROPER + PROPER_NOT_PROPER +) diff --git a/tokenize/pos_reciprocity.go b/tokenize/pos_reciprocity.go new file mode 100644 index 0000000..b8db567 --- /dev/null +++ b/tokenize/pos_reciprocity.go @@ -0,0 +1,9 @@ +package tokenize + +type Reciprocity int + +const ( + RECIPROCITY_UNKNOWN Reciprocity = iota + RECIPROCITY_RECIPROCAL + RECIPROCITY_NOT_RECIPROCAL +) diff --git a/tokenize/pos_tag.go b/tokenize/pos_tag.go new file mode 100644 index 0000000..6730b05 --- /dev/null +++ b/tokenize/pos_tag.go @@ -0,0 +1,20 @@ +package tokenize + +type Tag int + +const ( + TAG_UNKNOWN Tag = iota + TAG_ADJ + TAG_ADP + TAG_ADV + TAG_AFFIX + TAG_CONJ + TAG_DET + TAG_NOUN + TAG_NUM + TAG_PRON + TAG_PRT + TAG_PUNCT + TAG_VERB + TAG_X +) diff --git a/tokenize/pos_tense.go b/tokenize/pos_tense.go new file mode 100644 index 0000000..e2c134d --- /dev/null +++ b/tokenize/pos_tense.go @@ -0,0 +1,13 @@ +package tokenize + +type Tense int + +const ( + TENSE_UNKNOWN Tense = iota + TENSE_CONDITIONAL + TENSE_FUTURE + TENSE_PAST + TENSE_PRESENT + TENSE_IMPERATIVE + TENSE_PLUPERFECT +) diff --git a/tokenize/pos_voice.go b/tokenize/pos_voice.go new file mode 100644 index 0000000..3a6bb04 --- /dev/null +++ b/tokenize/pos_voice.go @@ -0,0 +1,10 @@ +package tokenize + +type Voice int + +const ( + VOICE_UNKNOWN Voice = iota + VOICE_ACTIVE + VOICE_CAUSATIVE + VOICE_PASSIVE +) diff --git a/tokenize/token.go b/tokenize/token.go new file mode 100644 index 0000000..90584db --- /dev/null +++ b/tokenize/token.go @@ -0,0 +1,7 @@ +package tokenize + +// Token represents a tokenized text unit +type Token struct { + PartOfSpeech PartOfSpeech + Text string +} diff --git a/tokenize/tokenize.go b/tokenize/tokenize.go deleted file mode 100644 index ad8e637..0000000 --- a/tokenize/tokenize.go +++ /dev/null @@ -1,77 +0,0 @@ -package tokenize - -import ( - "context" -) - -// Part of speech -type PoS int - -const ( - ANY = ADJ | ADP | ADV | AFFIX | CONJ | DET | NOUN | NUM | PRON | PRT | PUNCT | UNKN | VERB | X - - UNKN PoS = 1 << iota // Unknown - X // Other: foreign words, typos, abbreviations - - ADJ // Adjective - ADP // Adposition - ADV // Adverb - AFFIX // Affix - CONJ // Conjunction - DET // Determiner - NOUN // Noun - NUM // Cardinal number - PRON // Pronoun - PRT // Particle or other function word - PUNCT // Punctuation - VERB // Verb (all tenses and modes) -) - -// Tokenizer tokenizes a text and entities -type Tokenizer interface { - Tokenize(ctx context.Context, text string) ([]Token, error) -} - -// Token represents a tokenized text unit -type Token struct { - PoS PoS // Part of speech - Text string // Text -} - -var ( - // PoSMap maps pos strings to types - PoSMap = map[string]PoS{ - "any": ANY, - "adj": ADJ, - "adv": ADV, - "affix": AFFIX, - "conj": CONJ, - "det": DET, - "noun": NOUN, - "num": NUM, - "pron": PRON, - "prt": PRT, - "punct": PUNCT, - "unknown": UNKN, - "verb": VERB, - "x": X, - } - - // PoSMap maps pos types to strings - PoSMapStr = map[PoS]string{ - UNKN: "UNKNOWN", - ADJ: "ADJ", - ADP: "ADP", - ADV: "ADV", - CONJ: "CONJ", - DET: "DET", - NOUN: "NOUN", - NUM: "NUM", - PRON: "PRON", - PRT: "PRT", - PUNCT: "PUNCT", - VERB: "VERB", - X: "X", - AFFIX: "AFFIX", - } -) diff --git a/tokens.go b/tokens.go new file mode 100644 index 0000000..92ed5c6 --- /dev/null +++ b/tokens.go @@ -0,0 +1,62 @@ +package assocentity + +import ( + "container/list" + "context" + "unique" + + "github.com/ndabAP/assocentity/v15/tokenize" +) + +type ( + // Tokens wraps entities with multiple texts + Tokens struct { + entities [][]unique.Handle[tokenize.Token] + texts []*list.List + } +) + +// Tokenize tokenizes the text with the given tokenizer +func (s source) Tokenize(ctx context.Context, tokenizer tokenize.Tokenizer) (Tokens, error) { + var tokens Tokens + + // Entities + tokens.entities = make([][]unique.Handle[tokenize.Token], 0) + for _, entity := range s.Entities { + tok, err := tokenizer.Tokenize(ctx, entity) + if err != nil { + return tokens, err + } + + entity := make([]unique.Handle[tokenize.Token], 0) + for _, t := range tok { + entity = append(entity, unique.Make(t)) + } + + tokens.entities = append(tokens.entities, entity) + } + + // Texts + tokens.texts = make([]*list.List, 0, len(s.Texts)) + for _, text := range s.Texts { + select { + case <-ctx.Done(): + return tokens, ctx.Err() + default: + } + + tok, err := tokenizer.Tokenize(ctx, text) + if err != nil { + return tokens, err + } + + l := list.New() + for _, t := range tok { + l.PushBack(unique.Make(t)) + } + + tokens.texts = append(tokens.texts, l) + } + + return tokens, nil +} diff --git a/tokens_distance.go b/tokens_distance.go new file mode 100644 index 0000000..be7449d --- /dev/null +++ b/tokens_distance.go @@ -0,0 +1,12 @@ +package assocentity + +import ( + "github.com/ndabAP/assocentity/v15/tokenize" +) + +func (tokens Tokens) Distance() map[tokenize.Token]int { + for _, text := range tokens.texts { + for el := text.Front(); el != nil; el = el.Next() { + } + } +}