diff --git a/chainladder/core/base.py b/chainladder/core/base.py index e39f97cf..89baab17 100644 --- a/chainladder/core/base.py +++ b/chainladder/core/base.py @@ -103,12 +103,15 @@ def columns(self, value): @property def origin(self): - return pd.DatetimeIndex(self.odims, name='origin') + return pd.DatetimeIndex(self.odims, name='origin') \ + .to_period(self.origin_grain) @origin.setter def origin(self, value): self._len_check(self.origin, value) - self.odims = pd.Series([value] if type(value) is str else value).values + value = pd.PeriodIndex([item for item in list(value)], + freq=self.origin_grain).to_timestamp() + self.odims = value.values @property def development(self): @@ -128,7 +131,7 @@ def link_ratio(self): obj = copy.deepcopy(self) temp = obj.values.copy() temp[temp == 0] = np.nan - val_array = obj.valuation.values.reshape(obj.shape[-2:], + val_array = obj.valuation.to_timestamp().values.reshape(obj.shape[-2:], order='f')[:, 1:] obj.values = temp[..., 1:]/temp[..., :-1] obj.ddims = np.array(['{}-{}'.format(obj.ddims[i], obj.ddims[i+1]) @@ -139,7 +142,7 @@ def link_ratio(self): obj.odims = obj.odims[:-1] val_array = val_array[:-1, :] obj.valuation = pd.DatetimeIndex( - pd.DataFrame(val_array).unstack().values) + pd.DataFrame(val_array).unstack().values).to_period(self._lowest_grain()) return obj @property @@ -159,7 +162,7 @@ def get_latest_diagonal(self, compress=True): diagonal = np.expand_dims(np.nansum(diagonal, 3), 3) obj.ddims = ['Latest'] obj.valuation = pd.DatetimeIndex( - [pd.to_datetime(obj.valuation_date)]*len(obj.odims)) + [pd.to_datetime(obj.valuation_date)]*len(obj.odims)).to_period(self._lowest_grain()) obj.values = diagonal return obj @@ -252,7 +255,9 @@ def grain(self, grain='', incremental=False, inplace=False): self.values = self._slide(new_tri, direction='l') self.values[self.values == 0] = np.nan self.valuation = self._valuation_triangle() - del self._nan_triangle + if hasattr(self, '_nan_triangle'): + # Force update on _nan_triangle at next access. + del self._nan_triangle if incremental: self.cum_to_incr(inplace=True) return self @@ -283,7 +288,7 @@ def trend(self, trend=0.0): .value/365.25) else: trend = (1 + trend)**-( - pd.Series(self.valuation.values - + pd.Series(self.valuation.to_timestamp().values - np.datetime64(self.valuation_date)).dt.days .values.reshape(self.shape[-2:], order='f')/365.25) obj = copy.deepcopy(self) @@ -433,6 +438,8 @@ def _validate_arithmetic(self, other): ddims = set(self.ddims).intersection(set(other.ddims)) odims = set(self.odims).intersection(set(other.odims)) # Need to set string vs int type-casting + odims = pd.PeriodIndex(np.array(list(odims)), + freq=self.origin_grain) obj = obj[obj.origin.isin(odims)][obj.development.isin(ddims)] other = other[other.origin.isin(odims)][other.development.isin(ddims)] obj.odims = np.sort(np.array(list(odims))) @@ -712,7 +719,7 @@ def nan_triangle(self): hasattr(self, '_nan_triangle'): self.valuation = self._valuation_triangle() val_array = self.valuation - val_array = val_array.values.reshape(self.shape[-2:], order='f') + val_array = val_array.to_timestamp().values.reshape(self.shape[-2:], order='f') nan_triangle = np.array( pd.DataFrame(val_array) > self.valuation_date) nan_triangle = np.where(nan_triangle, np.nan, 1) @@ -736,6 +743,8 @@ def _valuation_triangle(self, ddims=None): origin = pd.PeriodIndex(self.odims, freq=self.origin_grain) \ .to_timestamp(how='s') origin = pd.Series(origin) + if type(self.valuation_date) is not pd.Timestamp: + self.valuation_date = self.valuation_date.to_timestamp() # Limit origin to valuation date origin[origin > self.valuation_date] = self.valuation_date next_development = origin+pd.DateOffset(days=-1, months=ddims[0]) @@ -750,7 +759,16 @@ def _valuation_triangle(self, ddims=None): next_development = np.expand_dims( np.array(origin+pd.DateOffset(days=-1, months=item)), -1) val_array = np.concatenate((val_array, next_development), -1) - return pd.DatetimeIndex(pd.DataFrame(val_array).unstack().values) + val_array = pd.DatetimeIndex(pd.DataFrame(val_array).unstack().values) + return val_array.to_period(self._lowest_grain()) + + def _lowest_grain(self): + my_list = ['M', 'Q', 'Y'] + my_dict = {item: num for num, item in enumerate(my_list)} + lowest_grain = my_list[min(my_dict[self.origin_grain], + my_dict[self.development_grain])] + return lowest_grain + def _slide(self, triangle, direction='r'): ''' Facilitates swapping alignment of triangle between development @@ -803,6 +821,7 @@ def to_datetime(data, fields, period_end=False): target = target_field.map(arr) if period_end: target = TriangleBase._period_end(target) + target.name = 'valuation' return target @staticmethod diff --git a/chainladder/development/base.py b/chainladder/development/base.py index 005d28ad..77ab63ab 100644 --- a/chainladder/development/base.py +++ b/chainladder/development/base.py @@ -3,6 +3,7 @@ ================ """ import numpy as np +import pandas as pd import copy import warnings from sklearn.base import BaseEstimator @@ -52,6 +53,15 @@ class Development(DevelopmentBase): 'volume', 'simple', and 'regression' sigma_interpolation : string optional (default='log-linear') Options include 'log-linear' and 'mack' + drop : tuple or list of tuples + Drops specific origin/development combination(s) + drop_high : bool or list of bool (default=None) + Drops highest link ratio(s) from LDF calculation + drop_low : bool or list of bool (default=None) + Drops lowest link ratio(s) from LDF calculation + drop_valuation : str or list of str (default=None) + Drops specific valuation periods. str must be date convertible. + Attributes ---------- @@ -68,10 +78,15 @@ class Development(DevelopmentBase): """ def __init__(self, n_periods=-1, average='volume', - sigma_interpolation='log-linear'): + sigma_interpolation='log-linear', drop=None, + drop_high=None, drop_low=None, drop_valuation=None): self.n_periods = n_periods self.average = average self.sigma_interpolation = sigma_interpolation + self.drop_high = drop_high + self.drop_low = drop_low + self.drop_valuation = drop_valuation + self.drop = drop def _assign_n_periods_weight(self, X): if type(self.n_periods) is int: @@ -105,6 +120,68 @@ def _assign_n_periods_weight_int(self, X, n_periods): np.ones((k, v, n_periods+1, d))), 2)*flip_nan return w*X.expand_dims(X.nan_triangle()) + def _drop_adjustment(self, X, link_ratio): + weight = X.nan_triangle()[:, :-1] + if self.drop_high is not None: + weight = weight*self._drop_hilo('high', X, link_ratio) + if self.drop_low is not None: + weight = weight*self._drop_hilo('low', X, link_ratio) + if self.drop is not None: + weight = weight*self._drop(X) + if self.drop_valuation is not None: + weight = weight*self._drop_valuation(X) + return weight + + def _drop_hilo(self, kind, X, link_ratio): + link_ratio[link_ratio == 0] = np.nan + lr_valid_count = np.sum(~np.isnan(link_ratio)[0, 0], axis=0) + if kind == 'high': + vals = np.nanmax(link_ratio, -2, keepdims=True) + drop_hilo = self.drop_high + else: + vals = np.nanmin(link_ratio, -2, keepdims=True) + drop_hilo = self.drop_low + hilo = 1*(vals != link_ratio) + if type(drop_hilo) is bool: + drop_hilo = [drop_hilo]*(len(X.development)-1) + for num, item in enumerate(self.average_): + if not drop_hilo[num]: + hilo[..., num] = hilo[..., num]*0+1 + else: + if lr_valid_count[num] < 3: + hilo[..., num] = hilo[..., num]*0+1 + warnings.warn('drop_high and drop_low cannot be computed ' + 'when less than three LDFs are present. ' + 'Ignoring exclusions in some cases.') + return hilo + + def _drop_valuation(self, X): + if type(self.drop_valuation) is not list: + drop_valuation = [self.drop_valuation] + else: + drop_valuation = self.drop_valuation + arr = 1-np.nan_to_num(X[X.valuation.isin( + pd.PeriodIndex(drop_valuation, + freq=X.origin_grain))].values[0, 0]*0+1) + ofill = X.shape[-2]-arr.shape[-2] + dfill = X.shape[-1]-arr.shape[-1] + np.repeat(np.expand_dims(np.ones(arr.shape[-1]), 0), ofill, 0) + if ofill > 0: + arr = np.concatenate((arr, np.repeat( + np.expand_dims(np.ones(arr.shape[-1]), 0), ofill, 0)), 0) + if dfill > 0: + arr = np.concatenate((arr, np.repeat( + np.expand_dims(np.ones(arr.shape[-2]), -1), dfill, -1)), -1) + return arr[:, :-1] + + def _drop(self, X): + drop = [self.drop] if type(self.drop) is not list else self.drop + arr = X.nan_triangle() + for item in drop: + arr[np.where(X.origin == item[0])[0][0], + np.where(X.development == item[1])[0][0]] = 0 + return arr[:, :-1] + def fit(self, X, y=None, sample_weight=None): """Fit the model with X. @@ -129,17 +206,18 @@ def fit(self, X, y=None, sample_weight=None): average = self.average average = np.array(average) self.average_ = average - weight_dict = {'regression': 2, 'volume': 1, 'simple': 0} - _x = tri_array[..., :-1] - _y = tri_array[..., 1:] - val = np.array([weight_dict.get(item.lower(), 2) + weight_dict = {'regression': 0, 'volume': 1, 'simple': 2} + x, y = tri_array[..., :-1], tri_array[..., 1:] + val = np.array([weight_dict.get(item.lower(), 1) for item in average]) for i in [2, 1, 0]: val = np.repeat(np.expand_dims(val, 0), tri_array.shape[i], axis=0) - val = np.nan_to_num(val * (_y * 0 + 1)) - _w = self._assign_n_periods_weight(X) / (_x**(val)) - self.w_ = self._assign_n_periods_weight(X) - params = WeightedRegression(axis=2, thru_orig=True).fit(_x, _y, _w) + val = np.nan_to_num(val * (y * 0 + 1)) + link_ratio = np.divide(y, x, where=np.nan_to_num(x) != 0) + self.w_ = self._assign_n_periods_weight(X) * \ + self._drop_adjustment(X, link_ratio) + w = self.w_ / (x**(val)) + params = WeightedRegression(axis=2, thru_orig=True).fit(x, y, w) if self.n_periods != 1: params = params.sigma_fill(self.sigma_interpolation) else: @@ -148,7 +226,7 @@ def fit(self, X, y=None, sample_weight=None): ' statistics. Only LDFs have been calculated.') params.std_err_ = np.nan_to_num(params.std_err_) + \ np.nan_to_num((1-np.nan_to_num(params.std_err_*0+1)) * - params.sigma_/np.swapaxes(np.sqrt(_x**(2-val))[..., 0:1, :], -1, -2)) + params.sigma_/np.swapaxes(np.sqrt(x**(2-val))[..., 0:1, :], -1, -2)) params = np.concatenate((params.slope_, params.sigma_, params.std_err_), 3) diff --git a/chainladder/development/tests/test_development.py b/chainladder/development/tests/test_development.py index eb35db18..7d3f1a37 100644 --- a/chainladder/development/tests/test_development.py +++ b/chainladder/development/tests/test_development.py @@ -22,8 +22,8 @@ def mack_p(data, average, est_sigma): return cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(cl.load_dataset(data)) -data = ['RAA', 'ABC', 'GenIns', 'M3IR5', 'MW2008', 'MW2014'] -averages = [('simple', 2), ('volume', 1), ('regression', 0)] +data = ['RAA', 'GenIns', 'MW2014'] +averages = [('simple', 0), ('volume', 1), ('regression', 2)] est_sigma = [('mack', 'Mack'), ('log-linear', 'log-linear')] @@ -36,6 +36,16 @@ def test_full_slice2(): assert cl.Development().fit_transform(cl.load_dataset('GenIns')).ldf_ == \ cl.Development(n_periods=[1000]*(cl.load_dataset('GenIns').shape[3]-1)).fit_transform(cl.load_dataset('GenIns')).ldf_ +def test_drop1(): + raa = cl.load_dataset('raa') + assert cl.Development(drop=('1982', 12)).fit(raa).ldf_.values[0, 0, 0, 0] == \ + cl.Development(drop_high=[True]+[False]*8).fit(raa).ldf_.values[0, 0, 0, 0] + +def test_drop2(): + raa = cl.load_dataset('raa') + assert cl.Development(drop_valuation='1981').fit(raa).ldf_.values[0, 0, 0, 0] == \ + cl.Development(drop_low=[True]+[False]*8).fit(raa).ldf_.values[0, 0, 0, 0] + def test_n_periods(): d = cl.load_dataset('usauto')['incurred'] return np.all(np.round(np.unique( diff --git a/chainladder/methods/mack.py b/chainladder/methods/mack.py index d153f57a..4f77dad3 100644 --- a/chainladder/methods/mack.py +++ b/chainladder/methods/mack.py @@ -64,7 +64,7 @@ def fit(self, X, y=None, sample_weight=None): def full_std_err_(self): obj = copy.deepcopy(self.X_) tri_array = self.full_triangle_.values - weight_dict = {'regression': 2, 'volume': 1, 'simple': 0} + weight_dict = {'regression': 0, 'volume': 1, 'simple': 2} val = np.array([weight_dict.get(item.lower(), 2) for item in list(self.average_) + ['volume']]) for i in [2, 1, 0]: diff --git a/chainladder/methods/tests/test_mack.py b/chainladder/methods/tests/test_mack.py index e0d99d1f..6527d899 100644 --- a/chainladder/methods/tests/test_mack.py +++ b/chainladder/methods/tests/test_mack.py @@ -27,9 +27,9 @@ def mack_p(data, average, est_sigma, tail): return cl.MackChainladder().fit(cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(cl.load_dataset(data))) -data = ['RAA', 'ABC', 'GenIns', 'MW2008', 'MW2014'] +data = ['ABC', 'MW2008'] tail = [True, False] -averages = [('simple', 2), ('volume', 1), ('regression', 0)] +averages = [('simple', 0), ('volume', 1), ('regression', 2)] est_sigma = [('log-linear', 'log-linear'), ('mack', 'Mack')] diff --git a/chainladder/tails/tests/test_exponential.py b/chainladder/tails/tests/test_exponential.py index 03f5a867..0d442b88 100644 --- a/chainladder/tails/tests/test_exponential.py +++ b/chainladder/tails/tests/test_exponential.py @@ -23,7 +23,7 @@ def mack_p_no_tail(data, average, est_sigma): data = ['RAA', 'ABC', 'GenIns', 'MW2008', 'MW2014'] # M3IR5 in R fails silently on exponential tail. Python actually computes it. -averages = [('simple', 2), ('volume', 1), ('regression', 0)] +averages = [('simple', 0), ('volume', 1), ('regression', 2)] est_sigma = [('mack', 'Mack'), ('log-linear', 'log-linear')] diff --git a/docs/auto_examples/auto_examples_python.zip b/docs/auto_examples/auto_examples_python.zip index 1a7c59dc..0f03950a 100644 Binary files a/docs/auto_examples/auto_examples_python.zip and b/docs/auto_examples/auto_examples_python.zip differ diff --git a/docs/auto_examples/plot_benktander_codeobj.pickle b/docs/auto_examples/plot_benktander_codeobj.pickle index ed4a2304..4347c15b 100644 Binary files a/docs/auto_examples/plot_benktander_codeobj.pickle and b/docs/auto_examples/plot_benktander_codeobj.pickle differ diff --git a/docs/auto_examples/plot_bf_apriori_from_cl_codeobj.pickle b/docs/auto_examples/plot_bf_apriori_from_cl_codeobj.pickle index 30b4b7f8..83a0e3fd 100644 Binary files a/docs/auto_examples/plot_bf_apriori_from_cl_codeobj.pickle and b/docs/auto_examples/plot_bf_apriori_from_cl_codeobj.pickle differ diff --git a/docs/auto_examples/plot_bootstrap_codeobj.pickle b/docs/auto_examples/plot_bootstrap_codeobj.pickle index db2816f8..c9756346 100644 Binary files a/docs/auto_examples/plot_bootstrap_codeobj.pickle and b/docs/auto_examples/plot_bootstrap_codeobj.pickle differ diff --git a/docs/auto_examples/plot_capecod_codeobj.pickle b/docs/auto_examples/plot_capecod_codeobj.pickle index 6fdc5d79..c3f35ff7 100644 Binary files a/docs/auto_examples/plot_capecod_codeobj.pickle and b/docs/auto_examples/plot_capecod_codeobj.pickle differ diff --git a/docs/auto_examples/plot_development_periods_codeobj.pickle b/docs/auto_examples/plot_development_periods_codeobj.pickle index bae947eb..b0a1e687 100644 Binary files a/docs/auto_examples/plot_development_periods_codeobj.pickle and b/docs/auto_examples/plot_development_periods_codeobj.pickle differ diff --git a/docs/auto_examples/plot_exhibits_codeobj.pickle b/docs/auto_examples/plot_exhibits_codeobj.pickle index 582c08d8..d7bfff3f 100644 Binary files a/docs/auto_examples/plot_exhibits_codeobj.pickle and b/docs/auto_examples/plot_exhibits_codeobj.pickle differ diff --git a/docs/auto_examples/plot_mack_codeobj.pickle b/docs/auto_examples/plot_mack_codeobj.pickle index 32296fc7..d4a43453 100644 Binary files a/docs/auto_examples/plot_mack_codeobj.pickle and b/docs/auto_examples/plot_mack_codeobj.pickle differ diff --git a/docs/auto_examples/plot_munich_codeobj.pickle b/docs/auto_examples/plot_munich_codeobj.pickle index da6119bd..2f642ef4 100644 Binary files a/docs/auto_examples/plot_munich_codeobj.pickle and b/docs/auto_examples/plot_munich_codeobj.pickle differ diff --git a/docs/auto_examples/plot_triangle_from_pandas_codeobj.pickle b/docs/auto_examples/plot_triangle_from_pandas_codeobj.pickle index d27b88fc..c93dfef1 100644 Binary files a/docs/auto_examples/plot_triangle_from_pandas_codeobj.pickle and b/docs/auto_examples/plot_triangle_from_pandas_codeobj.pickle differ diff --git a/docs/auto_examples/plot_triangle_slicing_codeobj.pickle b/docs/auto_examples/plot_triangle_slicing_codeobj.pickle index 6495be27..5c9989c4 100644 Binary files a/docs/auto_examples/plot_triangle_slicing_codeobj.pickle and b/docs/auto_examples/plot_triangle_slicing_codeobj.pickle differ diff --git a/docs/modules/development.rst b/docs/modules/development.rst index bb09d2c8..4211d180 100644 --- a/docs/modules/development.rst +++ b/docs/modules/development.rst @@ -13,8 +13,41 @@ Loss Development Patterns Basic Development ================== -:class:`Development` allows for the selection of loss development patterns. - +:class:`Development` allows for the selection of loss development patterns. Many +of the typical averaging techniques are available in this class. As well as the +ability to exclude certain patterns from the LDF calculation. + +Single Development Adjustment vs Entire Triangle adjustment +----------------------------------------------------------- + +Most of the arguments of the ``Development`` class can be specified for each +development period separately. When adjusting individual development periods +a list is required that defines the argument for each development. + +**Example:** + >>> import chainladder as cl + >>> raa = cl.load_dataset('raa') + >>> cl.Development(average=['volume']+['simple']*8).fit(raa) + +This approach works for ``average``, ``n_periods``, ``drop_high`` and ``drop_low``. + +Omitting link ratios +-------------------- +There are several arguments for dropping individual cells from the triangle as +well as excluding whole valuation periods or highs and lows. Any combination +of the 'drop' arguments is permissible. + +**Example:** + >>> import chainladder as cl + >>> raa = cl.load_dataset('raa') + >>> cl.Development(drop_high=True, drop_low=True).fit(raa) + >>> cl.Development(drop_valuation='1985').fit(raa) + >>> cl.Development(drop=[('1985', 12), ('1987', 24)]).fit(raa) + >>> cl.Development(drop=('1985', 12), drop_valuation='1988').fit(raa) + +.. note:: + ``drop_high`` and ``drop_low`` are ignored in cases where the number of link + ratios available for a given development period is less than 3. .. _incremental: