Skip to content

Commit

Permalink
Minor refactor of z-score to use the correct standard deviation
Browse files Browse the repository at this point in the history
  • Loading branch information
Karolina Cynk committed Nov 13, 2023
1 parent 5fc5ed9 commit ee96a43
Showing 1 changed file with 4 additions and 12 deletions.
16 changes: 4 additions & 12 deletions qf_lib/common/utils/miscellaneous/z_score_outliers_cut.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,17 @@
# limitations under the License.

from numpy import log
from qf_lib.common.utils.numberutils.is_finite_number import is_finite_number
from qf_lib.containers.series.qf_series import QFSeries


def z_score_outliers_cut(series: QFSeries) -> QFSeries:
""" Compute z-score for a series with logarithm the series and cutting std above 2 and -2 """
is_valid = series.map(is_finite_number)

# take a log to remove the outliers, then calculate z_score
clean_series = series[is_valid]
shift_value = 1 - clean_series.min()
log_series = log(clean_series.values.astype(float) + shift_value)
shift_value = 1 - series.min()
log_series = log(series.astype(float) + shift_value)
result = (log_series - log_series.mean()) / log_series.std()

# constraint the limits at +-2
result = result.clip(-2, 2)

# for unknown or invalid values use 0
final = QFSeries(index=series.index)
final[is_valid] = result
final[~is_valid] = 0
return final
result = result.fillna(0.0)
return result

0 comments on commit ee96a43

Please sign in to comment.