Skip to content

Commit

Permalink
Chore/gleanings any encoding (#1569)
Browse files Browse the repository at this point in the history
* Make claims and entities independent of encoding

* Semver

* Change semver release type
  • Loading branch information
AlonsoGuevara authored Jan 2, 2025
1 parent 2abd6c5 commit 5f9ad0d
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 8 deletions.
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20241230224307150194.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "minor",
"description": "Make gleanings independent of encoding"
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ def __init__(

# Construct the looping arguments
encoding = tiktoken.get_encoding(encoding_model or defs.ENCODING_MODEL)
yes = f"{encoding.encode('YES')[0]}"
no = f"{encoding.encode('NO')[0]}"
yes = f"{encoding.encode('Y')[0]}"
no = f"{encoding.encode('N')[0]}"
self._loop_args = {"logit_bias": {yes: 100, no: 100}, "max_tokens": 1}

async def __call__(
Expand Down Expand Up @@ -195,7 +195,7 @@ async def _process_document(
history=response.history,
model_parameters=self._loop_args,
)
if response.output.content != "YES":
if response.output.content != "Y":
break

return self._parse_claim_tuples(results, prompt_args)
Expand Down
6 changes: 3 additions & 3 deletions graphrag/index/operations/extract_entities/graph_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ def __init__(

# Construct the looping arguments
encoding = tiktoken.get_encoding(encoding_model or defs.ENCODING_MODEL)
yes = f"{encoding.encode('YES')[0]}"
no = f"{encoding.encode('NO')[0]}"
yes = f"{encoding.encode('Y')[0]}"
no = f"{encoding.encode('N')[0]}"
self._loop_args = {"logit_bias": {yes: 100, no: 100}, "max_tokens": 1}

async def __call__(
Expand Down Expand Up @@ -180,7 +180,7 @@ async def _process_document(
model_parameters=self._loop_args,
)

if response.output.content != "YES":
if response.output.content != "Y":
break

return results
Expand Down
2 changes: 1 addition & 1 deletion graphrag/prompts/index/claim_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@


CONTINUE_PROMPT = "MANY entities were missed in the last extraction. Add them below using the same format:\n"
LOOP_PROMPT = "It appears some entities may have still been missed. Answer YES {tuple_delimiter} NO if there are still entities that need to be added.\n"
LOOP_PROMPT = "It appears some entities may have still been missed. Answer Y or N if there are still entities that need to be added.\n"
2 changes: 1 addition & 1 deletion graphrag/prompts/index/entity_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,4 @@
Output:"""

CONTINUE_PROMPT = "MANY entities and relationships were missed in the last extraction. Remember to ONLY emit entities that match any of the previously extracted types. Add them below using the same format:\n"
LOOP_PROMPT = "It appears some entities and relationships may have still been missed. Answer YES | NO if there are still entities or relationships that need to be added.\n"
LOOP_PROMPT = "It appears some entities and relationships may have still been missed. Answer Y or N if there are still entities or relationships that need to be added.\n"

0 comments on commit 5f9ad0d

Please sign in to comment.