Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Persist question and key between runs. IsPredictable returns reasoning #50

Merged
merged 2 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions evo_researcher/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,30 @@ def log(self, msg: str) -> None:
st.title("Evo Predict")

with st.form("question_form", clear_on_submit=True):
question = st.text_input('Question', placeholder="Will Twitter implement a new misinformation policy before the end of 2024")
openai_api_key = st.text_input('OpenAI API Key', placeholder="sk-...", type="password")
question = st.text_input(
'Question',
placeholder="Will Twitter implement a new misinformation policy before the end of 2024",
value=st.session_state.get('question', '')
)
openai_api_key = st.text_input(
'OpenAI API Key',
placeholder="sk-...",
type="password",
value=st.session_state.get('openai_api_key', '')
)
submit_button = st.form_submit_button('Predict')

if submit_button and question and openai_api_key:
st.session_state['openai_api_key'] = openai_api_key
st.session_state['question'] = question

with st.container():
with st.spinner("Evaluating question..."):
is_predictable = evaluate_if_predictable(question=question, api_key=openai_api_key)
(is_predictable, reasoning) = evaluate_if_predictable(question=question, api_key=openai_api_key)

st.container(border=True).markdown(f"""### Question evaluation\n\nQuestion: **{question}**\n\nIs predictable: `{is_predictable}`""")
if not is_predictable:
st.container().error("The agent thinks this question is not predictable.")
st.container().error(f"The agent thinks this question is not predictable: \n\n{reasoning}")
st.stop()

with st.spinner("Researching..."):
Expand All @@ -65,7 +77,7 @@ def log(self, msg: str) -> None:

with st.spinner("Predicting..."):
with st.container(border=True):
prediction = _make_prediction(market_question=question, additional_information=report, engine="gpt-4-1106-preview", temperature=0.0, api_key=openai_api_key)
prediction = _make_prediction(market_question=question, additional_information=report, engine="gpt-4-0125-preview", temperature=0.0, api_key=openai_api_key)
with st.container().expander("Show agent's prediction", expanded=False):
if prediction.outcome_prediction == None:
st.container().error("The agent failed to generate a prediction")
Expand Down
12 changes: 8 additions & 4 deletions evo_researcher/benchmark/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,12 @@ def __init__(
self.embedding_model = embedding_model

def is_predictable(self, market_question: str) -> bool:
return is_predictable(question=market_question)
(result, _) = is_predictable(question=market_question)
return result

def is_predictable_restricted(self, market_question: str, time_restriction_up_to: datetime) -> bool:
return is_predictable(question=market_question)
(result, _) = is_predictable(question=market_question)
return result

def research(self, market_question: str) -> str:
return research_autonolas(
Expand Down Expand Up @@ -164,10 +166,12 @@ def __init__(
self.use_tavily_raw_content = use_tavily_raw_content

def is_predictable(self, market_question: str) -> bool:
return is_predictable(question=market_question)
(result, _) = is_predictable(question=market_question)
return result

def is_predictable_restricted(self, market_question: str, time_restriction_up_to: datetime) -> bool:
return is_predictable(question=market_question)
(result, _) = is_predictable(question=market_question)
return result

def predict(self, market_question: str) -> Prediction:
try:
Expand Down
29 changes: 17 additions & 12 deletions evo_researcher/functions/evaluate_question.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import json
import os
from evo_researcher.autonolas.research import clean_completion_json
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from evo_researcher.functions.cache import persistent_inmemory_cache
Expand All @@ -20,35 +22,38 @@

Then, write down what is the future event of the question, what it reffers to and when that event will happen if the question contains it.

Then, give your final decision, write either "yes" or "no" about whether the question is answerable.
Then, give your final decision about whether the question is answerable.

Return a JSON object with the following structure:

{{
"is_predictable": bool,
"reasoning": string
}}

Output only the JSON object in your response. Do not include any other contents in your response.
"""



@persistent_inmemory_cache
def is_predictable(
question: str,
engine: str = "gpt-4-1106-preview",
engine: str = "gpt-4-0125-preview",
prompt_template: str = QUESTION_EVALUATE_PROMPT,
api_key: str | None = None
) -> bool:
) -> tuple[bool, str]:
"""
Evaluate if the question is actually answerable.
"""

if api_key == None:
api_key = os.environ.get("OPENAI_API_KEY", "")

llm = ChatOpenAI(model=engine, temperature=0.0, api_key=api_key)

prompt = ChatPromptTemplate.from_template(template=prompt_template)
messages = prompt.format_messages(question=question)
completion = llm(messages, max_tokens=256).content
response = json.loads(clean_completion_json(completion))

if "yes" in completion.lower():
is_predictable = True
elif "no" in completion.lower():
is_predictable = False
else:
raise ValueError(f"Error in evaluate_question for `{question}`: {completion}")

return is_predictable
return (response["is_predictable"], response["reasoning"])
2 changes: 1 addition & 1 deletion evo_researcher/functions/grade_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def grade_info(question: str, information: str) -> str:
planning_prompt = ChatPromptTemplate.from_template(template=grading_planning_prompt_template)
formatting_prompt = ChatPromptTemplate.from_template(template=grading_format_prompt_template)

llm = ChatOpenAI(model="gpt-4-1106-preview", temperature=0)
llm = ChatOpenAI(model="gpt-4-0125-preview", temperature=0)

planning_chain = (
planning_prompt |
Expand Down
2 changes: 1 addition & 1 deletion evo_researcher/functions/rerank_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def rerank_results(results: list[str], goal: str) -> list[str]:

rerank_results_chain = (
rerank_results_prompt |
ChatOpenAI(model="gpt-4-1106-preview") |
ChatOpenAI(model="gpt-4-0125-preview") |
CommaSeparatedListOutputParser()
)

Expand Down
2 changes: 1 addition & 1 deletion evo_researcher/functions/research.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
def research(
goal: str,
use_summaries: bool,
model: str = "gpt-4-1106-preview",
model: str = "gpt-4-0125-preview",
initial_subqueries_limit: int = 20,
subqueries_limit: int = 4,
scrape_content_split_chunk_size: int = 800,
Expand Down
6 changes: 3 additions & 3 deletions evo_researcher/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def research(
start = time.time()

with get_openai_callback() as cb:
report = evo_research(goal=prompt, use_summaries=False, model="gpt-4-1106-preview")
report = evo_research(goal=prompt, use_summaries=False, model="gpt-4-0125-preview")

end = time.time()

Expand All @@ -58,9 +58,9 @@ def predict(prompt: str, path: str | None = None) -> None:
if path:
report = read_text_file(path)
else:
report = evo_research(goal=prompt, model="gpt-4-1106-preview", use_summaries=False)
report = evo_research(goal=prompt, model="gpt-4-0125-preview", use_summaries=False)

prediction = _make_prediction(market_question=prompt, additional_information=report, engine="gpt-4-1106-preview", temperature=0.0)
prediction = _make_prediction(market_question=prompt, additional_information=report, engine="gpt-4-0125-preview", temperature=0.0)

end = time.time()

Expand Down
2 changes: 1 addition & 1 deletion scripts/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def main(
agent_name="evo_gpt-3.5-turbo-0125_tavilyrawcontent",
use_tavily_raw_content=True,
),
# EvoAgent(model="gpt-4-1106-preview", max_workers=max_workers, agent_name="evo_gpt-4-1106-preview"), # Too expensive to be enabled by default.
# EvoAgent(model="gpt-4-0125-preview", max_workers=max_workers, agent_name="evo_gpt-4-0125-preview"), # Too expensive to be enabled by default.
],
cache_path=cache_path,
only_cached=only_cached,
Expand Down
3 changes: 2 additions & 1 deletion tests/test_evaluate_question.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@
("Did COVID-19 come from a laboratory?", False),
])
def test_evaluate_question(question: str, answerable: bool) -> None:
assert is_predictable(question=question) == answerable, f"Question is not evaluated correctly, see the completion: {is_predictable}"
(result, _) = is_predictable(question=question)
assert result == answerable, f"Question is not evaluated correctly, see the completion: {is_predictable}"
Loading