Skip to content

Commit

Permalink
Move embeddings function to config
Browse files Browse the repository at this point in the history
  • Loading branch information
natoverse committed Jan 15, 2025
1 parent 0e791e4 commit 8064244
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 28 deletions.
2 changes: 1 addition & 1 deletion graphrag/api/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

from graphrag.config.embeddings import (
community_full_content_embedding,
create_collection_name,
entity_description_embedding,
text_unit_text_embedding,
)
Expand All @@ -47,7 +48,6 @@
read_indexer_text_units,
)
from graphrag.utils.cli import redact
from graphrag.utils.embeddings import create_collection_name
from graphrag.vector_stores.base import BaseVectorStore
from graphrag.vector_stores.factory import VectorStoreFactory

Expand Down
19 changes: 19 additions & 0 deletions graphrag/config/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,22 @@ def get_embedding_settings(
return {
"strategy": strategy,
}


def create_collection_name(
container_name: str, embedding_name: str, validate: bool = True
) -> str:
"""
Create a collection name for the embedding store.
Within any given vector store, we can have multiple sets of embeddings organized into projects.
The `container` param is used for this partitioning, and is added as a prefix to the collection name for differentiation.
The embedding name is fixed, with the available list defined in graphrag.index.config.embeddings
Note that we use dot notation in our names, but many vector stores do not support this - so we convert to dashes.
"""
if validate and embedding_name not in all_embeddings:
msg = f"Invalid embedding name: {embedding_name}"
raise KeyError(msg)
return f"{container_name}-{embedding_name}".replace(".", "-")
2 changes: 1 addition & 1 deletion graphrag/index/operations/embed_text/embed_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@

from graphrag.cache.pipeline_cache import PipelineCache
from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
from graphrag.config.embeddings import create_collection_name
from graphrag.index.operations.embed_text.strategies.typing import TextEmbeddingStrategy
from graphrag.utils.embeddings import create_collection_name
from graphrag.vector_stores.base import BaseVectorStore, VectorStoreDocument
from graphrag.vector_stores.factory import VectorStoreFactory

Expand Down
25 changes: 0 additions & 25 deletions graphrag/utils/embeddings.py

This file was deleted.

2 changes: 1 addition & 1 deletion tests/unit/utils/test_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

from graphrag.utils.embeddings import create_collection_name
from graphrag.config.embeddings import create_collection_name


def test_create_collection_name():
Expand Down

0 comments on commit 8064244

Please sign in to comment.