Skip to content

Commit

Permalink
Merge pull request #39 from jbogaardt/development-options
Browse files Browse the repository at this point in the history
Development options
  • Loading branch information
jbogaardt authored Mar 12, 2019
2 parents 5049d9c + 8da0f33 commit 7df5952
Show file tree
Hide file tree
Showing 18 changed files with 167 additions and 27 deletions.
37 changes: 28 additions & 9 deletions chainladder/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,15 @@ def columns(self, value):

@property
def origin(self):
return pd.DatetimeIndex(self.odims, name='origin')
return pd.DatetimeIndex(self.odims, name='origin') \
.to_period(self.origin_grain)

@origin.setter
def origin(self, value):
self._len_check(self.origin, value)
self.odims = pd.Series([value] if type(value) is str else value).values
value = pd.PeriodIndex([item for item in list(value)],
freq=self.origin_grain).to_timestamp()
self.odims = value.values

@property
def development(self):
Expand All @@ -128,7 +131,7 @@ def link_ratio(self):
obj = copy.deepcopy(self)
temp = obj.values.copy()
temp[temp == 0] = np.nan
val_array = obj.valuation.values.reshape(obj.shape[-2:],
val_array = obj.valuation.to_timestamp().values.reshape(obj.shape[-2:],
order='f')[:, 1:]
obj.values = temp[..., 1:]/temp[..., :-1]
obj.ddims = np.array(['{}-{}'.format(obj.ddims[i], obj.ddims[i+1])
Expand All @@ -139,7 +142,7 @@ def link_ratio(self):
obj.odims = obj.odims[:-1]
val_array = val_array[:-1, :]
obj.valuation = pd.DatetimeIndex(
pd.DataFrame(val_array).unstack().values)
pd.DataFrame(val_array).unstack().values).to_period(self._lowest_grain())
return obj

@property
Expand All @@ -159,7 +162,7 @@ def get_latest_diagonal(self, compress=True):
diagonal = np.expand_dims(np.nansum(diagonal, 3), 3)
obj.ddims = ['Latest']
obj.valuation = pd.DatetimeIndex(
[pd.to_datetime(obj.valuation_date)]*len(obj.odims))
[pd.to_datetime(obj.valuation_date)]*len(obj.odims)).to_period(self._lowest_grain())
obj.values = diagonal
return obj

Expand Down Expand Up @@ -252,7 +255,9 @@ def grain(self, grain='', incremental=False, inplace=False):
self.values = self._slide(new_tri, direction='l')
self.values[self.values == 0] = np.nan
self.valuation = self._valuation_triangle()
del self._nan_triangle
if hasattr(self, '_nan_triangle'):
# Force update on _nan_triangle at next access.
del self._nan_triangle
if incremental:
self.cum_to_incr(inplace=True)
return self
Expand Down Expand Up @@ -283,7 +288,7 @@ def trend(self, trend=0.0):
.value/365.25)
else:
trend = (1 + trend)**-(
pd.Series(self.valuation.values -
pd.Series(self.valuation.to_timestamp().values -
np.datetime64(self.valuation_date)).dt.days
.values.reshape(self.shape[-2:], order='f')/365.25)
obj = copy.deepcopy(self)
Expand Down Expand Up @@ -433,6 +438,8 @@ def _validate_arithmetic(self, other):
ddims = set(self.ddims).intersection(set(other.ddims))
odims = set(self.odims).intersection(set(other.odims))
# Need to set string vs int type-casting
odims = pd.PeriodIndex(np.array(list(odims)),
freq=self.origin_grain)
obj = obj[obj.origin.isin(odims)][obj.development.isin(ddims)]
other = other[other.origin.isin(odims)][other.development.isin(ddims)]
obj.odims = np.sort(np.array(list(odims)))
Expand Down Expand Up @@ -712,7 +719,7 @@ def nan_triangle(self):
hasattr(self, '_nan_triangle'):
self.valuation = self._valuation_triangle()
val_array = self.valuation
val_array = val_array.values.reshape(self.shape[-2:], order='f')
val_array = val_array.to_timestamp().values.reshape(self.shape[-2:], order='f')
nan_triangle = np.array(
pd.DataFrame(val_array) > self.valuation_date)
nan_triangle = np.where(nan_triangle, np.nan, 1)
Expand All @@ -736,6 +743,8 @@ def _valuation_triangle(self, ddims=None):
origin = pd.PeriodIndex(self.odims, freq=self.origin_grain) \
.to_timestamp(how='s')
origin = pd.Series(origin)
if type(self.valuation_date) is not pd.Timestamp:
self.valuation_date = self.valuation_date.to_timestamp()
# Limit origin to valuation date
origin[origin > self.valuation_date] = self.valuation_date
next_development = origin+pd.DateOffset(days=-1, months=ddims[0])
Expand All @@ -750,7 +759,16 @@ def _valuation_triangle(self, ddims=None):
next_development = np.expand_dims(
np.array(origin+pd.DateOffset(days=-1, months=item)), -1)
val_array = np.concatenate((val_array, next_development), -1)
return pd.DatetimeIndex(pd.DataFrame(val_array).unstack().values)
val_array = pd.DatetimeIndex(pd.DataFrame(val_array).unstack().values)
return val_array.to_period(self._lowest_grain())

def _lowest_grain(self):
my_list = ['M', 'Q', 'Y']
my_dict = {item: num for num, item in enumerate(my_list)}
lowest_grain = my_list[min(my_dict[self.origin_grain],
my_dict[self.development_grain])]
return lowest_grain


def _slide(self, triangle, direction='r'):
''' Facilitates swapping alignment of triangle between development
Expand Down Expand Up @@ -803,6 +821,7 @@ def to_datetime(data, fields, period_end=False):
target = target_field.map(arr)
if period_end:
target = TriangleBase._period_end(target)
target.name = 'valuation'
return target

@staticmethod
Expand Down
98 changes: 88 additions & 10 deletions chainladder/development/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
================
"""
import numpy as np
import pandas as pd
import copy
import warnings
from sklearn.base import BaseEstimator
Expand Down Expand Up @@ -52,6 +53,15 @@ class Development(DevelopmentBase):
'volume', 'simple', and 'regression'
sigma_interpolation : string optional (default='log-linear')
Options include 'log-linear' and 'mack'
drop : tuple or list of tuples
Drops specific origin/development combination(s)
drop_high : bool or list of bool (default=None)
Drops highest link ratio(s) from LDF calculation
drop_low : bool or list of bool (default=None)
Drops lowest link ratio(s) from LDF calculation
drop_valuation : str or list of str (default=None)
Drops specific valuation periods. str must be date convertible.
Attributes
----------
Expand All @@ -68,10 +78,15 @@ class Development(DevelopmentBase):
"""
def __init__(self, n_periods=-1, average='volume',
sigma_interpolation='log-linear'):
sigma_interpolation='log-linear', drop=None,
drop_high=None, drop_low=None, drop_valuation=None):
self.n_periods = n_periods
self.average = average
self.sigma_interpolation = sigma_interpolation
self.drop_high = drop_high
self.drop_low = drop_low
self.drop_valuation = drop_valuation
self.drop = drop

def _assign_n_periods_weight(self, X):
if type(self.n_periods) is int:
Expand Down Expand Up @@ -105,6 +120,68 @@ def _assign_n_periods_weight_int(self, X, n_periods):
np.ones((k, v, n_periods+1, d))), 2)*flip_nan
return w*X.expand_dims(X.nan_triangle())

def _drop_adjustment(self, X, link_ratio):
weight = X.nan_triangle()[:, :-1]
if self.drop_high is not None:
weight = weight*self._drop_hilo('high', X, link_ratio)
if self.drop_low is not None:
weight = weight*self._drop_hilo('low', X, link_ratio)
if self.drop is not None:
weight = weight*self._drop(X)
if self.drop_valuation is not None:
weight = weight*self._drop_valuation(X)
return weight

def _drop_hilo(self, kind, X, link_ratio):
link_ratio[link_ratio == 0] = np.nan
lr_valid_count = np.sum(~np.isnan(link_ratio)[0, 0], axis=0)
if kind == 'high':
vals = np.nanmax(link_ratio, -2, keepdims=True)
drop_hilo = self.drop_high
else:
vals = np.nanmin(link_ratio, -2, keepdims=True)
drop_hilo = self.drop_low
hilo = 1*(vals != link_ratio)
if type(drop_hilo) is bool:
drop_hilo = [drop_hilo]*(len(X.development)-1)
for num, item in enumerate(self.average_):
if not drop_hilo[num]:
hilo[..., num] = hilo[..., num]*0+1
else:
if lr_valid_count[num] < 3:
hilo[..., num] = hilo[..., num]*0+1
warnings.warn('drop_high and drop_low cannot be computed '
'when less than three LDFs are present. '
'Ignoring exclusions in some cases.')
return hilo

def _drop_valuation(self, X):
if type(self.drop_valuation) is not list:
drop_valuation = [self.drop_valuation]
else:
drop_valuation = self.drop_valuation
arr = 1-np.nan_to_num(X[X.valuation.isin(
pd.PeriodIndex(drop_valuation,
freq=X.origin_grain))].values[0, 0]*0+1)
ofill = X.shape[-2]-arr.shape[-2]
dfill = X.shape[-1]-arr.shape[-1]
np.repeat(np.expand_dims(np.ones(arr.shape[-1]), 0), ofill, 0)
if ofill > 0:
arr = np.concatenate((arr, np.repeat(
np.expand_dims(np.ones(arr.shape[-1]), 0), ofill, 0)), 0)
if dfill > 0:
arr = np.concatenate((arr, np.repeat(
np.expand_dims(np.ones(arr.shape[-2]), -1), dfill, -1)), -1)
return arr[:, :-1]

def _drop(self, X):
drop = [self.drop] if type(self.drop) is not list else self.drop
arr = X.nan_triangle()
for item in drop:
arr[np.where(X.origin == item[0])[0][0],
np.where(X.development == item[1])[0][0]] = 0
return arr[:, :-1]

def fit(self, X, y=None, sample_weight=None):
"""Fit the model with X.
Expand All @@ -129,17 +206,18 @@ def fit(self, X, y=None, sample_weight=None):
average = self.average
average = np.array(average)
self.average_ = average
weight_dict = {'regression': 2, 'volume': 1, 'simple': 0}
_x = tri_array[..., :-1]
_y = tri_array[..., 1:]
val = np.array([weight_dict.get(item.lower(), 2)
weight_dict = {'regression': 0, 'volume': 1, 'simple': 2}
x, y = tri_array[..., :-1], tri_array[..., 1:]
val = np.array([weight_dict.get(item.lower(), 1)
for item in average])
for i in [2, 1, 0]:
val = np.repeat(np.expand_dims(val, 0), tri_array.shape[i], axis=0)
val = np.nan_to_num(val * (_y * 0 + 1))
_w = self._assign_n_periods_weight(X) / (_x**(val))
self.w_ = self._assign_n_periods_weight(X)
params = WeightedRegression(axis=2, thru_orig=True).fit(_x, _y, _w)
val = np.nan_to_num(val * (y * 0 + 1))
link_ratio = np.divide(y, x, where=np.nan_to_num(x) != 0)
self.w_ = self._assign_n_periods_weight(X) * \
self._drop_adjustment(X, link_ratio)
w = self.w_ / (x**(val))
params = WeightedRegression(axis=2, thru_orig=True).fit(x, y, w)
if self.n_periods != 1:
params = params.sigma_fill(self.sigma_interpolation)
else:
Expand All @@ -148,7 +226,7 @@ def fit(self, X, y=None, sample_weight=None):
' statistics. Only LDFs have been calculated.')
params.std_err_ = np.nan_to_num(params.std_err_) + \
np.nan_to_num((1-np.nan_to_num(params.std_err_*0+1)) *
params.sigma_/np.swapaxes(np.sqrt(_x**(2-val))[..., 0:1, :], -1, -2))
params.sigma_/np.swapaxes(np.sqrt(x**(2-val))[..., 0:1, :], -1, -2))
params = np.concatenate((params.slope_,
params.sigma_,
params.std_err_), 3)
Expand Down
14 changes: 12 additions & 2 deletions chainladder/development/tests/test_development.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def mack_p(data, average, est_sigma):
return cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(cl.load_dataset(data))


data = ['RAA', 'ABC', 'GenIns', 'M3IR5', 'MW2008', 'MW2014']
averages = [('simple', 2), ('volume', 1), ('regression', 0)]
data = ['RAA', 'GenIns', 'MW2014']
averages = [('simple', 0), ('volume', 1), ('regression', 2)]
est_sigma = [('mack', 'Mack'), ('log-linear', 'log-linear')]


Expand All @@ -36,6 +36,16 @@ def test_full_slice2():
assert cl.Development().fit_transform(cl.load_dataset('GenIns')).ldf_ == \
cl.Development(n_periods=[1000]*(cl.load_dataset('GenIns').shape[3]-1)).fit_transform(cl.load_dataset('GenIns')).ldf_

def test_drop1():
raa = cl.load_dataset('raa')
assert cl.Development(drop=('1982', 12)).fit(raa).ldf_.values[0, 0, 0, 0] == \
cl.Development(drop_high=[True]+[False]*8).fit(raa).ldf_.values[0, 0, 0, 0]

def test_drop2():
raa = cl.load_dataset('raa')
assert cl.Development(drop_valuation='1981').fit(raa).ldf_.values[0, 0, 0, 0] == \
cl.Development(drop_low=[True]+[False]*8).fit(raa).ldf_.values[0, 0, 0, 0]

def test_n_periods():
d = cl.load_dataset('usauto')['incurred']
return np.all(np.round(np.unique(
Expand Down
2 changes: 1 addition & 1 deletion chainladder/methods/mack.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def fit(self, X, y=None, sample_weight=None):
def full_std_err_(self):
obj = copy.deepcopy(self.X_)
tri_array = self.full_triangle_.values
weight_dict = {'regression': 2, 'volume': 1, 'simple': 0}
weight_dict = {'regression': 0, 'volume': 1, 'simple': 2}
val = np.array([weight_dict.get(item.lower(), 2)
for item in list(self.average_) + ['volume']])
for i in [2, 1, 0]:
Expand Down
4 changes: 2 additions & 2 deletions chainladder/methods/tests/test_mack.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ def mack_p(data, average, est_sigma, tail):
return cl.MackChainladder().fit(cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(cl.load_dataset(data)))


data = ['RAA', 'ABC', 'GenIns', 'MW2008', 'MW2014']
data = ['ABC', 'MW2008']
tail = [True, False]
averages = [('simple', 2), ('volume', 1), ('regression', 0)]
averages = [('simple', 0), ('volume', 1), ('regression', 2)]
est_sigma = [('log-linear', 'log-linear'), ('mack', 'Mack')]


Expand Down
2 changes: 1 addition & 1 deletion chainladder/tails/tests/test_exponential.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def mack_p_no_tail(data, average, est_sigma):

data = ['RAA', 'ABC', 'GenIns', 'MW2008', 'MW2014']
# M3IR5 in R fails silently on exponential tail. Python actually computes it.
averages = [('simple', 2), ('volume', 1), ('regression', 0)]
averages = [('simple', 0), ('volume', 1), ('regression', 2)]
est_sigma = [('mack', 'Mack'), ('log-linear', 'log-linear')]


Expand Down
Binary file modified docs/auto_examples/auto_examples_python.zip
Binary file not shown.
Binary file modified docs/auto_examples/plot_benktander_codeobj.pickle
Binary file not shown.
Binary file modified docs/auto_examples/plot_bf_apriori_from_cl_codeobj.pickle
Binary file not shown.
Binary file modified docs/auto_examples/plot_bootstrap_codeobj.pickle
Binary file not shown.
Binary file modified docs/auto_examples/plot_capecod_codeobj.pickle
Binary file not shown.
Binary file modified docs/auto_examples/plot_development_periods_codeobj.pickle
Binary file not shown.
Binary file modified docs/auto_examples/plot_exhibits_codeobj.pickle
Binary file not shown.
Binary file modified docs/auto_examples/plot_mack_codeobj.pickle
Binary file not shown.
Binary file modified docs/auto_examples/plot_munich_codeobj.pickle
Binary file not shown.
Binary file modified docs/auto_examples/plot_triangle_from_pandas_codeobj.pickle
Binary file not shown.
Binary file modified docs/auto_examples/plot_triangle_slicing_codeobj.pickle
Binary file not shown.
37 changes: 35 additions & 2 deletions docs/modules/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,41 @@ Loss Development Patterns
Basic Development
==================

:class:`Development` allows for the selection of loss development patterns.

:class:`Development` allows for the selection of loss development patterns. Many
of the typical averaging techniques are available in this class. As well as the
ability to exclude certain patterns from the LDF calculation.

Single Development Adjustment vs Entire Triangle adjustment
-----------------------------------------------------------

Most of the arguments of the ``Development`` class can be specified for each
development period separately. When adjusting individual development periods
a list is required that defines the argument for each development.

**Example:**
>>> import chainladder as cl
>>> raa = cl.load_dataset('raa')
>>> cl.Development(average=['volume']+['simple']*8).fit(raa)

This approach works for ``average``, ``n_periods``, ``drop_high`` and ``drop_low``.

Omitting link ratios
--------------------
There are several arguments for dropping individual cells from the triangle as
well as excluding whole valuation periods or highs and lows. Any combination
of the 'drop' arguments is permissible.

**Example:**
>>> import chainladder as cl
>>> raa = cl.load_dataset('raa')
>>> cl.Development(drop_high=True, drop_low=True).fit(raa)
>>> cl.Development(drop_valuation='1985').fit(raa)
>>> cl.Development(drop=[('1985', 12), ('1987', 24)]).fit(raa)
>>> cl.Development(drop=('1985', 12), drop_valuation='1988').fit(raa)

.. note::
``drop_high`` and ``drop_low`` are ignored in cases where the number of link
ratios available for a given development period is less than 3.

.. _incremental:

Expand Down

0 comments on commit 7df5952

Please sign in to comment.