generated from SamuelHudec/base_template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimple_max_profit.py
55 lines (42 loc) · 2.41 KB
/
simple_max_profit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import logging
import pandas as pd
from src.config.data import ID_COLUMN
from src.config.ranker import N_RECOMMENDATIONS, RANKING_COLUMNS
from src.config.retriever import N_CANDIDATES
logger = logging.getLogger("re_rank_candidates")
class SimpleMaxProfit:
def __init__(self, candidates: pd.DataFrame, data: pd.DataFrame) -> None:
self.candidates = candidates
self.data = data
self.FALL_BACK = False
def _merge_stats_to_candidates(self) -> pd.DataFrame:
df_to_rank = self.candidates.merge(self.data, left_on="reco_id", right_on=ID_COLUMN, suffixes=("", "_full"))
df_to_rank.drop(f"{ID_COLUMN}_full", axis=1, inplace=True)
# I did inner join check if I join everything
if self.candidates.shape[0] != df_to_rank.shape[0]:
self.FALL_BACK = True
logger.error("Re-ranker, Data didn't merged. Return retriever candidates")
return df_to_rank
def _check_ranks(self, data: pd.DataFrame) -> None:
# check if algo generates unique rank for each item sequence based
all_rank_sum = sum(range(1, data["reco_rank"].max() + 2))
rank_counts = data.groupby(ID_COLUMN)["re_rank"].sum().reset_index()
if sum(rank_counts["re_rank"] == all_rank_sum) != self.data.shape[0]:
self.FALL_BACK = True
logger.error("Re-ranker failed in assigning new rankings. Return retriever candidates")
def forward(self) -> pd.DataFrame:
df_to_rank = self._merge_stats_to_candidates()
# add rank to prices to distinguish between same price by rank this is safe when prices are high like here
df_to_rank["to_rank_by"] = df_to_rank["Price"] + df_to_rank["reco_rank"]
df_to_rank["re_rank"] = df_to_rank.groupby([ID_COLUMN])["to_rank_by"].rank(method="dense", ascending=False)
self._check_ranks(df_to_rank)
# fallback is better to recommend notting
if self.FALL_BACK:
# TODO: find a elegant way, how to fall back
df_to_rank["re_rank"] = df_to_rank["reco_rank"] + 1
logger.info(f"Fallback value {self.FALL_BACK}")
if isinstance(N_RECOMMENDATIONS, int) & (N_RECOMMENDATIONS < N_CANDIDATES):
df_to_rank = df_to_rank.loc[df_to_rank["re_rank"] <= N_RECOMMENDATIONS]
logger.info(f"Re-ranker trim recommendation to {N_RECOMMENDATIONS} items.")
logger.info(f"Re-ranker DONE")
return df_to_rank[RANKING_COLUMNS]