Skip to content

Commit

Permalink
Merge pull request #50 from polywrap/namesty/app-tweaks
Browse files Browse the repository at this point in the history
Persist question and key between runs. IsPredictable returns reasoning
  • Loading branch information
namesty authored Mar 11, 2024
2 parents 158a69b + 4f6ffdb commit c954079
Show file tree
Hide file tree
Showing 9 changed files with 51 additions and 29 deletions.
22 changes: 17 additions & 5 deletions evo_researcher/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,30 @@ def log(self, msg: str) -> None:
st.title("Evo Predict")

with st.form("question_form", clear_on_submit=True):
question = st.text_input('Question', placeholder="Will Twitter implement a new misinformation policy before the end of 2024")
openai_api_key = st.text_input('OpenAI API Key', placeholder="sk-...", type="password")
question = st.text_input(
'Question',
placeholder="Will Twitter implement a new misinformation policy before the end of 2024",
value=st.session_state.get('question', '')
)
openai_api_key = st.text_input(
'OpenAI API Key',
placeholder="sk-...",
type="password",
value=st.session_state.get('openai_api_key', '')
)
submit_button = st.form_submit_button('Predict')

if submit_button and question and openai_api_key:
st.session_state['openai_api_key'] = openai_api_key
st.session_state['question'] = question

with st.container():
with st.spinner("Evaluating question..."):
is_predictable = evaluate_if_predictable(question=question, api_key=openai_api_key)
(is_predictable, reasoning) = evaluate_if_predictable(question=question, api_key=openai_api_key)

st.container(border=True).markdown(f"""### Question evaluation\n\nQuestion: **{question}**\n\nIs predictable: `{is_predictable}`""")
if not is_predictable:
st.container().error("The agent thinks this question is not predictable.")
st.container().error(f"The agent thinks this question is not predictable: \n\n{reasoning}")
st.stop()

with st.spinner("Researching..."):
Expand All @@ -65,7 +77,7 @@ def log(self, msg: str) -> None:

with st.spinner("Predicting..."):
with st.container(border=True):
prediction = _make_prediction(market_question=question, additional_information=report, engine="gpt-4-1106-preview", temperature=0.0, api_key=openai_api_key)
prediction = _make_prediction(market_question=question, additional_information=report, engine="gpt-4-0125-preview", temperature=0.0, api_key=openai_api_key)
with st.container().expander("Show agent's prediction", expanded=False):
if prediction.outcome_prediction == None:
st.container().error("The agent failed to generate a prediction")
Expand Down
12 changes: 8 additions & 4 deletions evo_researcher/benchmark/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,12 @@ def __init__(
self.embedding_model = embedding_model

def is_predictable(self, market_question: str) -> bool:
return is_predictable(question=market_question)
(result, _) = is_predictable(question=market_question)
return result

def is_predictable_restricted(self, market_question: str, time_restriction_up_to: datetime) -> bool:
return is_predictable(question=market_question)
(result, _) = is_predictable(question=market_question)
return result

def research(self, market_question: str) -> str:
return research_autonolas(
Expand Down Expand Up @@ -164,10 +166,12 @@ def __init__(
self.use_tavily_raw_content = use_tavily_raw_content

def is_predictable(self, market_question: str) -> bool:
return is_predictable(question=market_question)
(result, _) = is_predictable(question=market_question)
return result

def is_predictable_restricted(self, market_question: str, time_restriction_up_to: datetime) -> bool:
return is_predictable(question=market_question)
(result, _) = is_predictable(question=market_question)
return result

def predict(self, market_question: str) -> Prediction:
try:
Expand Down
29 changes: 17 additions & 12 deletions evo_researcher/functions/evaluate_question.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import json
import os
from evo_researcher.autonolas.research import clean_completion_json
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from evo_researcher.functions.cache import persistent_inmemory_cache
Expand All @@ -20,35 +22,38 @@
Then, write down what is the future event of the question, what it reffers to and when that event will happen if the question contains it.
Then, give your final decision, write either "yes" or "no" about whether the question is answerable.
Then, give your final decision about whether the question is answerable.
Return a JSON object with the following structure:
{{
"is_predictable": bool,
"reasoning": string
}}
Output only the JSON object in your response. Do not include any other contents in your response.
"""



@persistent_inmemory_cache
def is_predictable(
question: str,
engine: str = "gpt-4-1106-preview",
engine: str = "gpt-4-0125-preview",
prompt_template: str = QUESTION_EVALUATE_PROMPT,
api_key: str | None = None
) -> bool:
) -> tuple[bool, str]:
"""
Evaluate if the question is actually answerable.
"""

if api_key == None:
api_key = os.environ.get("OPENAI_API_KEY", "")

llm = ChatOpenAI(model=engine, temperature=0.0, api_key=api_key)

prompt = ChatPromptTemplate.from_template(template=prompt_template)
messages = prompt.format_messages(question=question)
completion = llm(messages, max_tokens=256).content
response = json.loads(clean_completion_json(completion))

if "yes" in completion.lower():
is_predictable = True
elif "no" in completion.lower():
is_predictable = False
else:
raise ValueError(f"Error in evaluate_question for `{question}`: {completion}")

return is_predictable
return (response["is_predictable"], response["reasoning"])
2 changes: 1 addition & 1 deletion evo_researcher/functions/grade_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def grade_info(question: str, information: str) -> str:
planning_prompt = ChatPromptTemplate.from_template(template=grading_planning_prompt_template)
formatting_prompt = ChatPromptTemplate.from_template(template=grading_format_prompt_template)

llm = ChatOpenAI(model="gpt-4-1106-preview", temperature=0)
llm = ChatOpenAI(model="gpt-4-0125-preview", temperature=0)

planning_chain = (
planning_prompt |
Expand Down
2 changes: 1 addition & 1 deletion evo_researcher/functions/rerank_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def rerank_results(results: list[str], goal: str) -> list[str]:

rerank_results_chain = (
rerank_results_prompt |
ChatOpenAI(model="gpt-4-1106-preview") |
ChatOpenAI(model="gpt-4-0125-preview") |
CommaSeparatedListOutputParser()
)

Expand Down
2 changes: 1 addition & 1 deletion evo_researcher/functions/research.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
def research(
goal: str,
use_summaries: bool,
model: str = "gpt-4-1106-preview",
model: str = "gpt-4-0125-preview",
initial_subqueries_limit: int = 20,
subqueries_limit: int = 4,
scrape_content_split_chunk_size: int = 800,
Expand Down
6 changes: 3 additions & 3 deletions evo_researcher/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def research(
start = time.time()

with get_openai_callback() as cb:
report = evo_research(goal=prompt, use_summaries=False, model="gpt-4-1106-preview")
report = evo_research(goal=prompt, use_summaries=False, model="gpt-4-0125-preview")

end = time.time()

Expand All @@ -58,9 +58,9 @@ def predict(prompt: str, path: str | None = None) -> None:
if path:
report = read_text_file(path)
else:
report = evo_research(goal=prompt, model="gpt-4-1106-preview", use_summaries=False)
report = evo_research(goal=prompt, model="gpt-4-0125-preview", use_summaries=False)

prediction = _make_prediction(market_question=prompt, additional_information=report, engine="gpt-4-1106-preview", temperature=0.0)
prediction = _make_prediction(market_question=prompt, additional_information=report, engine="gpt-4-0125-preview", temperature=0.0)

end = time.time()

Expand Down
2 changes: 1 addition & 1 deletion scripts/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def main(
agent_name="evo_gpt-3.5-turbo-0125_tavilyrawcontent",
use_tavily_raw_content=True,
),
# EvoAgent(model="gpt-4-1106-preview", max_workers=max_workers, agent_name="evo_gpt-4-1106-preview"), # Too expensive to be enabled by default.
# EvoAgent(model="gpt-4-0125-preview", max_workers=max_workers, agent_name="evo_gpt-4-0125-preview"), # Too expensive to be enabled by default.
],
cache_path=cache_path,
only_cached=only_cached,
Expand Down
3 changes: 2 additions & 1 deletion tests/test_evaluate_question.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@
("Did COVID-19 come from a laboratory?", False),
])
def test_evaluate_question(question: str, answerable: bool) -> None:
assert is_predictable(question=question) == answerable, f"Question is not evaluated correctly, see the completion: {is_predictable}"
(result, _) = is_predictable(question=question)
assert result == answerable, f"Question is not evaluated correctly, see the completion: {is_predictable}"

0 comments on commit c954079

Please sign in to comment.