Skip to content

Commit

Permalink
Adding AutoFuzzyJoin Algorithm for value mapping
Browse files Browse the repository at this point in the history
  • Loading branch information
EduardoPena committed Jun 4, 2024
1 parent c85a9a4 commit 9fa32ba
Showing 1 changed file with 19 additions and 10 deletions.
29 changes: 19 additions & 10 deletions bdikit/mapping_algorithms/value_mapping/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@


class BaseAlgorithm:

def __init__(self, *args):
pass

Expand Down Expand Up @@ -51,6 +52,7 @@ def match(self, current_values, target_values, threshold=0.8):


class EmbeddingAlgorithm(BaseAlgorithm):

def __init__(self, model_path="bert-base-multilingual-cased"):
embeddings = TransformerWordEmbeddings(model_path)
method = Embeddings(embeddings, min_similarity=0, model_id="embedding_model")
Expand Down Expand Up @@ -110,25 +112,32 @@ def __init__(self):
pass

def match(self, current_values, target_values, threshold=0.8):

current_values = sorted(list(set(current_values)))
target_values = sorted(list(set(target_values)))

df_curr_values = pd.DataFrame({'id': range(1, len(current_values)+1), 'title': current_values})
df_target_values = pd.DataFrame({'id': range(1, len(target_values)+1), 'title': target_values})


df_curr_values = pd.DataFrame(
{"id": range(1, len(current_values) + 1), "title": current_values}
)
df_target_values = pd.DataFrame(
{"id": range(1, len(target_values) + 1), "title": target_values}
)

matches = []
try:
autofj = AutoFJ( precision_target=threshold, join_function_space="autofj_md", verbose=True)
autofj = AutoFJ(
precision_target=threshold,
join_function_space="autofj_md",
verbose=True,
)
LR_joins = autofj.join(df_curr_values, df_target_values, id_column="id")
if len(LR_joins) > 0:
for index, row in LR_joins.iterrows():
title_l = row['title_l']
title_r = row['title_r']
title_l = row["title_l"]
title_r = row["title_r"]
similarity = ratio(title_l, title_r)
if similarity >= threshold:
matches.append((title_l, title_r, similarity))
except Exception as e:
except Exception as e:
return matches
return matches

0 comments on commit 9fa32ba

Please sign in to comment.