Skip to content

Commit

Permalink
resolves #180
Browse files Browse the repository at this point in the history
  • Loading branch information
jbogaardt committed Jul 25, 2021
1 parent d06f0ce commit 468d733
Show file tree
Hide file tree
Showing 7 changed files with 278 additions and 27 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,8 @@ settings.json
*.DS_Store
*Icon
*r

# asv environments
.asv

coverage_html_report
1 change: 1 addition & 0 deletions benchmarks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

160 changes: 160 additions & 0 deletions benchmarks/asv.conf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
{
// The version of the config file format. Do not change, unless
// you know what you are doing.
"version": 1,

// The name of the project being benchmarked
"project": "chainladder-python",

// The project's homepage
"project_url": "https://chainladder-python.readthedocs.io/en/latest/",

// The URL or local path of the source code repository for the
// project being benchmarked
"repo": "..",

// The Python project's subdirectory in your repo. If missing or
// the empty string, the project is assumed to be located at the root
// of the repository.
// "repo_subdir": "",

// Customizable commands for building, installing, and
// uninstalling the project. See asv.conf.json documentation.
//
// "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"],
// "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
// "build_command": [
// "python setup.py build",
// "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
// ],

// List of branches to benchmark. If not provided, defaults to "master"
// (for git) or "default" (for mercurial).
// "branches": ["master"], // for git
// "branches": ["default"], // for mercurial

// The DVCS being used. If not set, it will be automatically
// determined from "repo" by looking at the protocol in the URL
// (if remote), or by looking for special directories, such as
// ".git" (if local).
// "dvcs": "git",

// The tool to use to create environments. May be "conda",
// "virtualenv" or other value depending on the plugins in use.
// If missing or the empty string, the tool will be automatically
// determined by looking for tools on the PATH environment
// variable.
"environment_type": "conda",

// timeout in seconds for installing any dependencies in environment
// defaults to 10 min
//"install_timeout": 600,

// the base URL to show a commit for the project.
// "show_commit_url": "http://github.com/owner/project/commit/",

// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
// "pythons": ["3.6", "3.9"],

// The list of conda channel names to be searched for benchmark
// dependency packages in the specified order
// "conda_channels": ["conda-forge", "defaults"],

// The matrix of dependencies to test. Each key is the name of a
// package (in PyPI) and the values are version numbers. An empty
// list or empty string indicates to just test against the default
// (latest) version. null indicates that the package is to not be
// installed. If the package to be tested is only available from
// PyPi, and the 'environment_type' is conda, then you can preface
// the package name by 'pip+', and the package will be installed via
// pip (with all the conda available packages installed first,
// followed by the pip installed packages).
//
// "matrix": {
// "numpy": ["1.6", "1.7"],
// "six": ["", null], // test with and without six installed
// "pip+emcee": [""], // emcee is only available for install with pip.
// },

// Combinations of libraries/python versions can be excluded/included
// from the set to test. Each entry is a dictionary containing additional
// key-value pairs to include/exclude.
//
// An exclude entry excludes entries where all values match. The
// values are regexps that should match the whole string.
//
// An include entry adds an environment. Only the packages listed
// are installed. The 'python' key is required. The exclude rules
// do not apply to includes.
//
// In addition to package names, the following keys are available:
//
// - python
// Python version, as in the *pythons* variable above.
// - environment_type
// Environment type, as above.
// - sys_platform
// Platform, as in sys.platform. Possible values for the common
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
//
// "exclude": [
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
// {"environment_type": "conda", "six": null}, // don't run without six on conda
// ],
//
// "include": [
// // additional env for python2.7
// {"python": "2.7", "numpy": "1.8"},
// // additional env if run on windows+conda
// {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
// ],

// The directory (relative to the current directory) that benchmarks are
// stored in. If not provided, defaults to "benchmarks"
"benchmark_dir": ".",

// The directory (relative to the current directory) to cache the Python
// environments in. If not provided, defaults to "env"
"env_dir": "../.asv/env",

// The directory (relative to the current directory) that raw benchmark
// results are stored in. If not provided, defaults to "results".
"results_dir": "../.asv/results",

// The directory (relative to the current directory) that the html tree
// should be written to. If not provided, defaults to "html".
"html_dir": "../.asv/html",

// The number of characters to retain in the commit hashes.
// "hash_length": 8,

// `asv` will cache results of the recent builds in each
// environment, making them faster to install next time. This is
// the number of builds to keep, per environment.
// "build_cache_size": 2,

// The commits after which the regression search in `asv publish`
// should start looking for regressions. Dictionary whose keys are
// regexps matching to benchmark names, and values corresponding to
// the commit (exclusive) after which to start looking for
// regressions. The default is to start from the first commit
// with results. If the commit is `null`, regression detection is
// skipped for the matching benchmark.
//
// "regressions_first_commits": {
// "some_benchmark": "352cdf", // Consider regressions only after this commit
// "another_benchmark": null, // Skip regression detection altogether
// },

// The thresholds for relative change in results, after which `asv
// publish` starts reporting regressions. Dictionary of the same
// form as in ``regressions_first_commits``, with values
// indicating the thresholds. If multiple entries match, the
// maximum is taken. If no entry matches, the default is 5%.
//
// "regressions_thresholds": {
// "some_benchmark": 0.01, // Threshold of 1%
// "another_benchmark": 0.5, // Threshold of 50%
// },
}
57 changes: 57 additions & 0 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Write the benchmarking functions here.
# See "Writing benchmarks" in the asv docs for more information.
import chainladder as cl

class TimeSuite:
def setup(self):
self.prism = cl.load_sample('prism')

def time_incr_to_cum(self):
self.prism.incr_to_cum()

def time_groupby(self):
self.prism.groupby(['Line']).sum()

def time_index_broadcasting(self):
self.prism / self.prism.groupby(['Line']).sum()

def time_grain(self):
self.prism.grain('OYDY')

def time_dev_to_val(self):
self.prism.dev_to_val()

def time_val_to_dev(self):
self.prism.dev_to_val().val_to_dev()

def time_fit_chainladder(self):
cl.Chainladder().fit(
cl.Development(groupby=lambda x : 1).fit_transform(self.prism['Paid'])
).ibnr_

class MemSuite:
def setup(self):
self.prism = cl.load_sample('prism')

def peakmem_incr_to_cum(self):
self.prism.incr_to_cum()

def peakmem_groupby(self):
self.prism.groupby(['Line']).sum()

def peakmem_index_broadcasting(self):
self.prism / self.prism.groupby(['Line']).sum()

def peakmem_grain(self):
self.prism.grain('OYDY')

def peakmem_dev_to_val(self):
self.prism.dev_to_val()

def peakmem_val_to_dev(self):
self.prism.dev_to_val().val_to_dev()

def peakmem_fit_chainladder(self):
cl.Chainladder().fit(
cl.Development(groupby=lambda x : 1).fit_transform(self.prism['Paid'])
).ibnr_
26 changes: 19 additions & 7 deletions chainladder/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,25 @@ def __init__(
' is expressed as an age where a date-like vector is required')

# Summarize dataframe to the level specified in axes
data["__origin__"] = origin_date
data["__development__"] = development_date
key_gr = ["__origin__", "__development__"] + [
data[item] for item in ([] if not index else index)
]
data_agg = data.groupby(key_gr)[columns].sum().reset_index().fillna(0)
data = data.drop(['__origin__', '__development__'], axis=1)
if type(data) != pd.DataFrame:
# Dask dataframes are mutated
data["__origin__"] = origin_date
data["__development__"] = development_date
key_gr = ["__origin__", "__development__"] + [
data[item] for item in ([] if not index else index)
]
data_agg = data.groupby(key_gr)[columns].sum().reset_index().fillna(0)
data = data.drop(['__origin__', '__development__'], axis=1)
else:
# Summarize dataframe to the level specified in axes
key_gr = [origin_date, development_date] + [
data[item] for item in ([] if not index else index)
]
data_agg = data[columns].groupby(key_gr).sum().reset_index().fillna(0)
data_agg["__origin__"] = data_agg[origin_date.name]
data_agg["__development__"] = data_agg[development_date.name]


if not index:
index = ["Total"]
data_agg[index[0]] = "Total"
Expand Down
55 changes: 35 additions & 20 deletions chainladder/core/dunders.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
from chainladder.core.pandas import TriangleGroupBy
from chainladder.utils.sparse import sp

try:
import dask.bag as db
except:
db = None

class TriangleDunders:
""" Class that implements the dunder (double underscore) methods for the
Triangle class
Expand Down Expand Up @@ -200,13 +205,23 @@ def _get_key_union(obj, other):
return set(list(obj.groups.indices.keys()) +
list(other.groups.indices.keys()))

def _arithmetic_mapper(self, obj, other, f):
""" Use Dask if available, otherwise basic list comprehension """
if db and obj.obj.array_backend == 'sparse':
bag = db.from_sequence(self._get_key_union(obj, other))
bag = bag.map(f, self, obj, other)
c = bag.compute(scheduler='threads')
else:
c = [f(k, self, obj, other) for k in self._get_key_union(obj, other)]
return concat(c, 0).sort_index()

def __add__(self, other):
obj, other = self._validate_arithmetic(other)
if isinstance(obj, TriangleGroupBy):
c = [self._slice_or_nan(obj, other, k) +
self._slice_or_nan(other, obj, k)
for k in self._get_key_union(obj, other)]
obj = concat(c, 0).sort_index()
def f(k, self, obj, other):
return (self._slice_or_nan(obj, other, k) +
self._slice_or_nan(other, obj, k))
obj = self._arithmetic_mapper(obj, other, f)
else:
xp = obj.get_array_module()
obj.values = xp.nan_to_num(obj.values) + xp.nan_to_num(other)
Expand All @@ -218,10 +233,10 @@ def __radd__(self, other):
def __sub__(self, other):
obj, other = self._validate_arithmetic(other)
if isinstance(obj, TriangleGroupBy):
c = [self._slice_or_nan(obj, other, k) -
self._slice_or_nan(other, obj, k)
for k in self._get_key_union(obj, other)]
obj = concat(c, 0).sort_index()
def f(k, self, obj, other):
return (self._slice_or_nan(obj, other, k) -
self._slice_or_nan(other, obj, k))
obj = self._arithmetic_mapper(obj, other, f)
else:
xp = obj.get_array_module()
obj.values = xp.nan_to_num(obj.values) - xp.nan_to_num(other)
Expand Down Expand Up @@ -252,10 +267,10 @@ def __abs__(self):
def __mul__(self, other):
obj, other = self._validate_arithmetic(other)
if isinstance(obj, TriangleGroupBy):
c = [self._slice_or_nan(obj, other, k) *
self._slice_or_nan(other, obj, k)
for k in self._get_key_union(obj, other)]
obj = concat(c, 0).sort_index()
def f(k, self, obj, other):
return (self._slice_or_nan(obj, other, k) *
self._slice_or_nan(other, obj, k))
obj = self._arithmetic_mapper(obj, other, f)
else:
xp = obj.get_array_module()
obj.values = obj.values * other
Expand All @@ -267,10 +282,10 @@ def __rmul__(self, other):
def __pow__(self, other):
obj, other = self._validate_arithmetic(other)
if isinstance(obj, TriangleGroupBy):
c = [self._slice_or_nan(obj, other, k) **
self._slice_or_nan(other, obj, k)
for k in self._get_key_union(obj, other)]
obj = concat(c, 0).sort_index()
def f(k, self, obj, other):
return (self._slice_or_nan(obj, other, k) **
self._slice_or_nan(other, obj, k))
obj = self._arithmetic_mapper(obj, other, f)
else:
xp = obj.get_array_module()
obj.values = xp.nan_to_num(obj.values) ** other
Expand All @@ -285,10 +300,10 @@ def __round__(self, other):
def __truediv__(self, other):
obj, other = self._validate_arithmetic(other)
if isinstance(obj, TriangleGroupBy):
c = [self._slice_or_nan(obj, other, k) /
self._slice_or_nan(other, obj, k)
for k in self._get_key_union(obj, other)]
obj = concat(c, 0).sort_index()
def f(k, self, obj, other):
return (self._slice_or_nan(obj, other, k) /
self._slice_or_nan(other, obj, k))
obj = self._arithmetic_mapper(obj, other, f)
else:
xp = obj.get_array_module()
obj.values = obj.values / other
Expand Down
1 change: 1 addition & 0 deletions environment-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,4 @@ dependencies:

- pip:
- sphinx_gallery
- asv

0 comments on commit 468d733

Please sign in to comment.