From 5957e093e57f64bcaed9e038f4c30444fac25922 Mon Sep 17 00:00:00 2001 From: shane Date: Tue, 17 Sep 2024 20:35:49 -0400 Subject: [PATCH] alloy_changes w_out ipynb --- bolift/aqfxns.py | 12 ++++++++---- bolift/asktell.py | 16 ++++++++++++---- bolift/pool.py | 5 ++--- bolift/tool.py | 18 +++++++++--------- 4 files changed, 31 insertions(+), 20 deletions(-) diff --git a/bolift/aqfxns.py b/bolift/aqfxns.py index e7e2662..b2c1da9 100644 --- a/bolift/aqfxns.py +++ b/bolift/aqfxns.py @@ -80,13 +80,15 @@ def greedy_d(probs, values, best): def expected_improvement_g(mean, std, best): """Expected improvement for the given Gaussian distribution""" - z = (mean - best) / std + eps=1e-15 + z = (mean - best) / (std+eps) ei = (mean - best) * norm.cdf(z) + std * norm.pdf(z) return ei def log_expected_improvement_g(mean, std, best): """Log Expected improvement for the given Gaussian distribution""" - z = (mean - best) / std + eps=1e-15 + z = (mean - best) / (std+eps) # ei = std * h(z) # ei = std * (norm.pdf(z) + z * norm.cdf(z)) log_ei = np.log(std) + np.log((norm.pdf(z) + z * norm.cdf(z))) @@ -94,14 +96,16 @@ def log_expected_improvement_g(mean, std, best): def probability_of_improvement_g(mean, std, best): """Probability of improvement for the given Gaussian distribution""" - z = (mean - best) / std + eps=1e-15 + z = (mean - best) / (std+eps) pi = norm.cdf(z) return pi def upper_confidence_bound_g(mean, std, best, _lambda): """Upper confidence bound for the given Gaussian distribution""" - return mean + _lambda * std + eps=1e-15 + return mean + _lambda * (std+eps) def greedy_g(mean, std, best): diff --git a/bolift/asktell.py b/bolift/asktell.py index f6c117b..777f6e4 100644 --- a/bolift/asktell.py +++ b/bolift/asktell.py @@ -145,7 +145,7 @@ def __init__( self._answer_choices = _answer_choices[:k] self.use_quantiles = use_quantiles self.n_quantiles = n_quantiles - self._calibration_factor = None + self._calibration_factor = 1.0 self._verbose = verbose self.tokens_used = 0 self.cos_sim = cos_sim @@ -309,6 +309,7 @@ def ask( aug_random_filter: Add this man y random examples to the pool to increase diversity after reducing pool with inverse model _lambda: Lambda value to use for UCB lambda_mult: control MMR diversity ,0-1 lower = more diverse + Return: The selected x values, their acquisition function values, and the predicted y modes. Sorted by acquisition function value (descending) @@ -323,6 +324,7 @@ def ask( init_pnt, [0] * k, [0] * k, + [0] * k, ) if aq_fxn == "probability_of_improvement": @@ -340,6 +342,8 @@ def ask( possible_x.sample(k), [0] * k, [0] * k, + [0] * k, + ) else: raise ValueError(f"Unknown acquisition function: {aq_fxn}") @@ -352,7 +356,7 @@ def ask( if inv_filter+aug_random_filter < len(possible_x): possible_x_l = [] if inv_filter: - approx_x = self.inv_predict(best * np.random.normal(1.2, 0.05), system_message=inv_system_message) + approx_x = self.inv_predict(best + np.abs(best)*np.random.normal(0.2, 0.05), system_message=inv_system_message) possible_x_l.extend(possible_x.approx_sample(approx_x, inv_filter, lambda_mult=lambda_mult)) if aug_random_filter: @@ -367,6 +371,8 @@ def ask( possible_x.sample(k), [0] * k, [0] * k, + [0] * k, + [0] * k ) return results @@ -456,7 +462,7 @@ def _setup_prompt( def _setup_inverse_prompt(self, example: Dict): prompt_template = PromptTemplate( input_variables=["x", "y", "y_name", "x_name"], - template="If {y_name} is {y}, then {x_name} is @@@\n{x}###", + template="If {y_name} is {y}, then the {x_name} is @@@\n{x}###", ) if example is not None: prompt_template.format(**example) @@ -468,7 +474,7 @@ def _setup_inverse_prompt(self, example: Dict): if len(examples) == 0: raise ValueError("Cannot do zero-shot with selector") - sim_selector = SemanticSimilarityExampleSelector if self.cos_sim else MaxMarginalRelevanceExampleSelector #LabelSimilarityExampleSelector + sim_selector = SemanticSimilarityExampleSelector if self.cos_sim else MaxMarginalRelevanceExampleSelector # LabelSimilarityExampleSelector example_selector = sim_selector.from_examples( [example], OpenAIEmbeddings(), @@ -541,11 +547,13 @@ def _ask( aq_vals = [aq_fxn(r, best) for r in results] selected = np.argsort(aq_vals)[::-1][:k] means = [r.mean() for r in results] + stds = [r.std() for r in results] return ( [possible_x[i] for i in selected], [aq_vals[i] for i in selected], [means[i] for i in selected], + [stds[i] for i in selected], ) diff --git a/bolift/pool.py b/bolift/pool.py index b82ccfc..9fe0c9a 100644 --- a/bolift/pool.py +++ b/bolift/pool.py @@ -27,7 +27,7 @@ def __init__(self, pool: List[Any], formatter: Callable = lambda x: str(x)) -> N self.format = formatter self._db = FAISS.from_texts( [formatter(x) for x in pool], - OpenAIEmbeddings(), # model="text-embedding-3-large" + OpenAIEmbeddings(), #model="text-embedding-3-large" metadatas=[dict(data=p) for p in pool], ) @@ -72,5 +72,4 @@ def __str__(self) -> str: return f"Pool of {len(self)} items with {len(self._selected)} selected" def __iter__(self): - return iter(self._available) - + return iter(self._available) \ No newline at end of file diff --git a/bolift/tool.py b/bolift/tool.py index 06f6203..cdbf42b 100644 --- a/bolift/tool.py +++ b/bolift/tool.py @@ -4,16 +4,16 @@ from typing import * import os import pandas as pd +from pydantic import BaseModel - -class BOLiftTool(BaseTool): - name = "Experiment Designer" - description = ("Propose or predict experiments using stateful ask-and-tell Bayes Optimizer. " - "Syntax: Tell {{CSV_FILE}}. Adds training examples to model, {{CSV_FILE}}. No header and only two columns: x in column 0, y in column 1. " - "Ask. Returns optimal experiment to run next. Must call Tell first. " - "Best. Returns predicted experiment. Must call Tell first.") - asktell: AskTellFewShotTopk = None - pool: Pool = None +class BOLiftTool(BaseTool): + name: str = "Experiment Designer" + description: str = ( + "Propose or predict experiments using stateful ask-and-tell Bayes Optimizer. " + "Syntax: Tell {{CSV_FILE}}. Adds training examples to model, {{CSV_FILE}}. No header and only two columns: x in column 0, y in column 1. " + "Ask. Returns optimal experiment to run next. Must call Tell first. " + "Best. Returns predicted experiment. Must call Tell first." + ) def __init__(self, pool: Pool, asktell: Optional[AskTellFewShotTopk] = None, ): # call the parent class constructor