-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscan.py
31 lines (29 loc) · 951 Bytes
/
scan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd
import tiktoken
import json
import numpy as np
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
client = OpenAI()
encoder = tiktoken.get_encoding("cl100k_base")
def getLogprobes(prompt, probnum=5, toknum=1):
res = client.chat.completions.create(
model = "gpt-4-1106-preview",
messages = [{"role":"system","content":"Repeat user message exactly"},{"role": "user","content": prompt}],
max_tokens = toknum,
logprobs = True,
top_logprobs = probnum,
temperature = 0.0,
)
try:
return [{encoder.encode(r.token)[0]: r.logprob} for r in res.choices[0].logprobs.content[0].top_logprobs]
except IndexError:
return []
def scan(start=0):
for idx, k in enumerate(encoder._mergeable_ranks):
if idx < start: continue
val = encoder._mergeable_ranks[k]
tok = encoder.decode([val])
print({val:getLogprobes(tok)})
scan()