Skip to content

Commit

Permalink
feat(populate): allow auto embedding fields
Browse files Browse the repository at this point in the history
  • Loading branch information
ragusa87 authored and SergioMendolia committed Feb 7, 2025
1 parent 2d87452 commit 379e705
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 5 deletions.
48 changes: 47 additions & 1 deletion docs/02_Mapping.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,19 @@ biblioverse_typesense:
facet: true
- name: owners
type: string[]

```

Configuring the mapping involves the following functionalities:
* Your entity class is indexed on every insertion/update/deletion.
* You can automatically populate your entites into typesense.
* You can automatically populate your entities into typesense.
* A service is automatically available to interact with the Typesense API and hydrate the result with your entities.

Limitation:
* Only entities with a Single Identifier are supported. PR are welcome to support composite keys.



# Types

The type such as `string` or `string[]` are the same as the Typesense types. You can see the full list of types in the [Typesense documentation](https://typesense.org/docs/0.21.0/api/collections.html#schema-fields).
Expand Down Expand Up @@ -59,3 +61,47 @@ Config
entity_attribute: typesenseAddress
```
# Auto embedding fields
You can use the Typesense auto embedding fields to generate embeddings.
```yaml
embedding:
name: embedding
type: float[]
index: true
embed:
from: ["field_to_embed"]
model_config:
model_name: "..."
api_key: '...'
url: "..."
```
You can refer to the [Typesense](https://typesense.org/docs/27.1/api/vector-search.html#index-embeddings) documentation for the details of the configuration.
Here is an example for a local ollama embedding that would generate embeddings from the "summary" and "tags" fields, with the `nomic-embed-text` model.
- Please note: `numDim` should be specified and corresponding to your model specifications
- You need to prefix your model name with `openai/` if you are not using Typesense's own embedding engine.

```
embedding:
name: embedding
type: float[]
index: true
mapped: false
numDim: 768
embed:
from: ["tags", "summary"]
model_config:
model_name: "openai/nomic-embed-text"
api_key: '<key>'
url: "http://localhost:11434/"
```


## Mapping

Unless you want to store embeddings in your database, you can specify `mapped: false` in the configuration, the embeddings will only live in typesense.

In this case, we recommend to exclude the fields from being retrieved when doing a query as.
17 changes: 17 additions & 0 deletions src/AbstractBiblioverseTypesenseBundle.php
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ private function addCollectionsConfig(ArrayNodeDefinition $arrayNodeDefinition):
->booleanNode('facet')
->defaultNull()
->end()
->booleanNode('mapped')
->defaultTrue()
->end()
->scalarNode('entity_attribute')
->defaultNull()
->end()
Expand Down Expand Up @@ -123,6 +126,20 @@ private function addCollectionsConfig(ArrayNodeDefinition $arrayNodeDefinition):
->scalarNode('vecDist')
->defaultNull()
->end()
->arrayNode('embed')
->children()
->arrayNode('from')
->defaultValue([])->scalarPrototype()->end()
->end()
->arrayNode('model_config')
->children()
->scalarNode('model_name')->isRequired()->end()
->scalarNode('api_key')->defaultNull()->end()
->scalarNode('url')->defaultNull()->end()
->end()
->end()
->end()
->end()
->end()
->end()
->end()
Expand Down
4 changes: 4 additions & 0 deletions src/Mapper/Entity/EntityTransformer.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ public function transform(object $entity): array
$data = [];

foreach ($this->mappingGenerator->getMapping()->getFields() as $fieldMapping) {
if ($fieldMapping->isMapped() === false) {
continue;
}

$fieldName = $fieldMapping->getEntityAttribute() ?? $fieldMapping->getName();
$value = $this->valueExtractor->getValue($entity, $fieldName);

Expand Down
28 changes: 26 additions & 2 deletions src/Mapper/Fields/FieldMapping.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,18 @@
* 'locale'?:string|null,
* 'reference'?:string|null,
* 'entity_attribute'?:string|null,
* 'vecDist'?:string|null
* 'vecDist'?:string|null,
* 'embed'?:null|FieldMappingEmbedArray,
* 'mapped'?:bool
* }
* @phpstan-type FieldMappingEmbedArray array{
* 'from':string,
* 'model_config'?:FieldMappingEmbedModelValueArray,
* }
* @phpstan-type FieldMappingEmbedModelValueArray array{
* 'model_name':string,
* 'api_key'?:string,
* 'url'?:string
* }
*/
class FieldMapping implements FieldMappingInterface
Expand All @@ -30,6 +41,9 @@ class FieldMapping implements FieldMappingInterface

public ?string $entityAttribute = null;

/**
* @param FieldMappingEmbedArray $embed
*/
public function __construct(
public string $name,
DataTypeEnum|string $type,
Expand All @@ -46,6 +60,8 @@ public function __construct(
public ?string $locale = null,
public ?string $reference = null,
public ?string $vecDist = null,
public ?array $embed = null,
public bool $mapped = true,
) {
$this->type = $type instanceof DataTypeEnum ? $type->value : $type;
}
Expand All @@ -60,7 +76,7 @@ public function toArray(): array
'infix' => $this->infix,
'locale' => $this->locale,
'name' => $this->name,
'num_dim' => $this->numDim,
'num_dims' => $this->numDim,
'optional' => $this->optional,
'range_index' => $this->rangeIndex,
'reference' => $this->reference,
Expand All @@ -69,6 +85,7 @@ public function toArray(): array
'store' => $this->store,
'type' => $this->type,
'vec_dist' => $this->vecDist,
'embed' => $this->embed,
], fn ($value) => $value !== null);
}

Expand All @@ -82,6 +99,11 @@ public function getName(): string
return $this->name;
}

public function isMapped(): bool
{
return $this->mapped;
}

public function getEntityAttribute(): ?string
{
return $this->entityAttribute;
Expand All @@ -108,6 +130,8 @@ public static function fromArray(array $config): self
$config['locale'] ?? null,
$config['reference'] ?? null,
$config['vecDist'] ?? null,
$config['embed'] ?? null,
$config['mapped'] ?? true,
);

$result->entityAttribute = $config['entity_attribute'] ?? null;
Expand Down
2 changes: 2 additions & 0 deletions src/Mapper/Fields/FieldMappingInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,6 @@ public function getName(): string;
public function getEntityAttribute(): ?string;

public function isOptional(): bool;

public function isMapped(): bool;
}
5 changes: 3 additions & 2 deletions src/Query/SearchQuery.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public function __construct(
// After this line, we use alphabetical order for the parameters
?array $stopwords = null,
bool|array|null $prefix = null,
private readonly ?VectorQueryInterface $vectorQuery = null,
private readonly string|VectorQueryInterface|null $vectorQuery = null,
private readonly ?VoiceQueryInterface $voiceQuery = null,
private readonly ?bool $enableHighlightV1 = null,
private readonly ?bool $enableLazyFilter = null,
Expand Down Expand Up @@ -197,7 +197,8 @@ public function toArray(): array
'synonym_prefix' => $this->synonymPrefix,
'text_match_type' => $this->textMatchType,
'typo_tokens_threshold' => $this->typoTokensThreshold,
'vector_query' => $this->vectorQuery?->toArray(),
// @phpstan-ignore-next-line (->toArray is part of vectorInterface)
'vector_query' => $this->voiceQuery instanceof VectorQueryInterface ? $this->vectorQuery->toArray() : $this->vectorQuery,
'voice_query' => $this->voiceQuery instanceof VoiceQueryInterface ? (string) $this->voiceQuery : null,
], fn (mixed $value): bool => !is_null($value));
}
Expand Down

0 comments on commit 379e705

Please sign in to comment.