diff --git a/doc/sphinx/tutorials/index.rst b/doc/sphinx/tutorials/index.rst index 9934141e16..325ec04a3b 100644 --- a/doc/sphinx/tutorials/index.rst +++ b/doc/sphinx/tutorials/index.rst @@ -8,5 +8,6 @@ Tutorials :maxdepth: 3 getting_started + publicdata_ps kdepdf_mcbg_ps trad_ps_expbg diff --git a/doc/sphinx/tutorials/publicdata_ps.ipynb b/doc/sphinx/tutorials/publicdata_ps.ipynb new file mode 100644 index 0000000000..95dd49a50d --- /dev/null +++ b/doc/sphinx/tutorials/publicdata_ps.ipynb @@ -0,0 +1,1153 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Working with the public 10-year IceCube point-source data\n", + "==" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This tutorial shows how to use the IceCube public 10-year point-source data with SkyLLH." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Disclaimer**\n", + "\n", + " The released 10-year IceCube point-source data can reproduce the published results only within a certain\n", + " amount of uncertainty due to the limited instrument response function binning provided in the data release.\n", + " The IceCube collaboration is able to reproduce the published results using detailed direct simulation\n", + " data, as done for the publication." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from matplotlib import pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Getting the datasets\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First we import the dataset definition of the public 10-year point-source data set:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from skyllh.datasets.i3.PublicData_10y_ps import create_dataset_collection" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The collection of datasets can be created using the ``create_dataset_collection`` function. This function requires the base path to the data repository. It's the path where the public point-source data is stored. The public point-source data can be downloaded from the [IceCube website](http://icecube.wisc.edu/data-releases/20210126_PS-IC40-IC86_VII.zip)." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "dsc = create_dataset_collection(base_path='/home/mwolf/projects/publicdata_ps/')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The ``dataset_names`` property provides a list of all the data sets defined in the data set collection of the public point-source data." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['IC40',\n", + " 'IC59',\n", + " 'IC79',\n", + " 'IC86_I',\n", + " 'IC86_II',\n", + " 'IC86_II-VII',\n", + " 'IC86_III',\n", + " 'IC86_IV',\n", + " 'IC86_V',\n", + " 'IC86_VI',\n", + " 'IC86_VII']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dsc.dataset_names" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The individual data sets ``IC86_II``, ``IC86_III``, ``IC86_IV``, ``IC86_V``, ``IC86_VI``, and ``IC86_VII`` are also available as a single combined data set ``IC86_II-VII``, because these data sets share the same detector simulation and event selection. Hence, we can get a list of data sets via the ``get_datasets`` method of the ``dsc`` instance:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "datasets = dsc.get_datasets(['IC40', 'IC59', 'IC79', 'IC86_I', 'IC86_II-VII'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Getting the analysis\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The analysis used for the published PRL results is referred in SkyLLH as \"*traditional point-source analysis*\" and is pre-defined:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from skyllh.analyses.i3.publicdata_ps.time_integrated_ps import create_analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function create_analysis in module skyllh.analyses.i3.publicdata_ps.time_integrated_ps:\n", + "\n", + "create_analysis(datasets, source, refplflux_Phi0=1, refplflux_E0=1000.0, refplflux_gamma=2, ns_seed=10.0, gamma_seed=3, kde_smoothing=False, minimizer_impl='LBFGS', cap_ratio=False, compress_data=False, keep_data_fields=None, optimize_delta_angle=10, tl=None, ppbar=None)\n", + " Creates the Analysis instance for this particular analysis.\n", + " \n", + " Parameters:\n", + " -----------\n", + " datasets : list of Dataset instances\n", + " The list of Dataset instances, which should be used in the\n", + " analysis.\n", + " source : PointLikeSource instance\n", + " The PointLikeSource instance defining the point source position.\n", + " refplflux_Phi0 : float\n", + " The flux normalization to use for the reference power law flux model.\n", + " refplflux_E0 : float\n", + " The reference energy to use for the reference power law flux model.\n", + " refplflux_gamma : float\n", + " The spectral index to use for the reference power law flux model.\n", + " ns_seed : float\n", + " Value to seed the minimizer with for the ns fit.\n", + " gamma_seed : float | None\n", + " Value to seed the minimizer with for the gamma fit. If set to None,\n", + " the refplflux_gamma value will be set as gamma_seed.\n", + " kde_smoothing : bool\n", + " Apply a KDE-based smoothing to the data-driven background pdf.\n", + " Default: False.\n", + " minimizer_impl : str | \"LBFGS\"\n", + " Minimizer implementation to be used. Supported options are \"LBFGS\"\n", + " (L-BFG-S minimizer used from the :mod:`scipy.optimize` module), or\n", + " \"minuit\" (Minuit minimizer used by the :mod:`iminuit` module).\n", + " Default: \"LBFGS\".\n", + " cap_ratio : bool\n", + " If set to True, the energy PDF ratio will be capped to a finite value\n", + " where no background energy PDF information is available. This will\n", + " ensure that an energy PDF ratio is available for high energies where\n", + " no background is available from the experimental data.\n", + " If kde_smoothing is set to True, cap_ratio should be set to False!\n", + " Default is False.\n", + " compress_data : bool\n", + " Flag if the data should get converted from float64 into float32.\n", + " keep_data_fields : list of str | None\n", + " List of additional data field names that should get kept when loading\n", + " the data.\n", + " optimize_delta_angle : float\n", + " The delta angle in degrees for the event selection optimization methods.\n", + " tl : TimeLord instance | None\n", + " The TimeLord instance to use to time the creation of the analysis.\n", + " ppbar : ProgressBar instance | None\n", + " The instance of ProgressBar for the optional parent progress bar.\n", + " \n", + " Returns\n", + " -------\n", + " analysis : TimeIntegratedMultiDatasetSingleSourceAnalysis\n", + " The Analysis instance for this analysis.\n", + "\n" + ] + } + ], + "source": [ + "help(create_analysis)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As source we use TXS 0506+056." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from skyllh.physics.source import PointLikeSource" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "source = PointLikeSource(ra=np.deg2rad(77.35), dec=np.deg2rad(5.7))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[==========================================================] 100% ELT 0h:00m:14s[ ] 0% ELT 0h:00m:00s\n", + "[==========================================================] 100% ELT 0h:00m:13s[ ] 0% ELT 0h:00m:00s\n", + "[==========================================================] 100% ELT 0h:00m:13s[ ] 0% ELT 0h:00m:00s\n", + "[==========================================================] 100% ELT 0h:00m:13s[ ] 0% ELT 0h:00m:00s\n", + "[==========================================================] 100% ELT 0h:00m:12s[ ] 0% ELT 0h:00m:00s\n", + "[==========================================================] 100% ELT 0h:01m:36s\n", + "[==========================================================] 100% ELT 0h:00m:00s\n" + ] + } + ], + "source": [ + "ana = create_analysis(datasets=datasets, source=source)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Initializing a trial\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After the `Analysis` instance was created trials can be run. To do so the analysis needs to be initialized with some trial data. For instance we could initialize the analysis with the experimental data to \"unblind\" the analysis afterwards. Technically the `TrialDataManager` of each log-likelihood ratio function, i.e. dataset, is initialized with data.\n", + "\n", + "The `Analysis` class provides the method `initialize_trial` to initialize a trial with data. It takes a list of `DataFieldRecordArray` instances holding the events. If we want to initialize a trial with the experimental data, we can get that list from the `Analysis` instance itself:" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "events_list = [ data.exp for data in ana.data_list ]\n", + "ana.initialize_trial(events_list)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Maximizing the log-likelihood ratio function\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After initializing a trial, we can maximize the LLH ratio function using the `maximize_llhratio` method of the `Analysis` class. This method requires a ``RandomStateService`` instance in case the minimizer does not succeed and a new set of initial values for the fit parameters need to get generated. The method returns a 4-element tuple. The first element is the set of fit parameters used in the maximization. The second element is the value of the LLH ration function at its maximum. The third element is the array of the fit parameter values at the maximum, and the forth element is the status dictionary of the minimizer." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "from skyllh.core.random import RandomStateService\n", + "rss = RandomStateService(seed=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "(fitparamset, log_lambda_max, fitparam_values, status) = ana.maximize_llhratio(rss)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "log_lambda_max = 6.572529558548655\n", + "fitparam_values = [14.58039149 2.1685849 ]\n", + "status = {'grad': array([-2.09454353e-06, 2.13693588e-04]), 'task': b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH', 'funcalls': 15, 'nit': 9, 'warnflag': 0, 'skyllh_minimizer_n_reps': 0, 'n_llhratio_func_calls': 15}\n" + ] + } + ], + "source": [ + "print(f'log_lambda_max = {log_lambda_max}')\n", + "print(f'fitparam_values = {fitparam_values}')\n", + "print(f'status = {status}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculating the test-statistic\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the maximum of the LLH ratio function and the fit parameter values at the maximum we can calculate the test-statistic using the `calculate_test_statistic` method of the `Analysis` class:" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TS = 13.145\n" + ] + } + ], + "source": [ + "TS = ana.calculate_test_statistic(log_lambda_max, fitparam_values)\n", + "print(f'TS = {TS:.3f}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Unblinding the data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After creating the analysis instance we can unblind the data for the choosen source. Hence, we initialize the analysis with a trial of the experimental data, maximize the log-likelihood ratio function for all given experimental data events, and calculate the test-statistic value. The analysis instance has the method ``unblind`` that can be used for that. This method requires a ``RandomStateService`` instance in case the minimizer does not succeed and a new set of initial values for the fit parameters need to get generated." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "from skyllh.core.random import RandomStateService\n", + "rss = RandomStateService(seed=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on method unblind in module skyllh.core.analysis:\n", + "\n", + "unblind(rss) method of skyllh.core.analysis.TimeIntegratedMultiDatasetSingleSourceAnalysis instance\n", + " Evaluates the unscrambled data, i.e. unblinds the data.\n", + " \n", + " Parameters\n", + " ----------\n", + " rss : RandomStateService instance\n", + " The RandomStateService instance that should be used draw random\n", + " numbers from.\n", + " \n", + " Returns\n", + " -------\n", + " TS : float\n", + " The test-statistic value.\n", + " fitparam_dict : dict\n", + " The dictionary holding the global fit parameter names and their best\n", + " fit values.\n", + " status : dict\n", + " The status dictionary with information about the performed\n", + " minimization process of the negative of the log-likelihood ratio\n", + " function.\n", + "\n" + ] + } + ], + "source": [ + "help(ana.unblind)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The ``unblind`` method returns the test-statistic value, the best-fit fit parameter values, and a status dictionary of the minimizer." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "(ts, x, status) = ana.unblind(rss=rss)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TS = 13.145\n", + "ns = 14.58\n", + "gamma = 2.17\n" + ] + } + ], + "source": [ + "print(f'TS = {ts:.3f}')\n", + "print(f'ns = {x[\"ns\"]:.2f}')\n", + "print(f'gamma = {x[\"gamma\"]:.2f}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Calculating the corresponding flux normalization " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By default the analysis is created with a flux normalization of 1 GeV$^{-1}$s$^{-1}$cm$^{-2}$sr$^{-1}$ (see `refplflux_Phi0` argument of the `create_analysis` method). The analysis instance has the method `calculate_fluxmodel_scaling_factor` that calculates the scaling factor the reference flux normalization has to be multiplied with to represent a given analysis result, i.e. $n_{\\text{s}}$ and $\\gamma$ value. This function takes the detected mean $n_{\\text{s}}$ value as first argument and the list of source parameter values as second argument:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Flux scaling factor = 1.423e-15\n" + ] + } + ], + "source": [ + "scaling_factor = ana.calculate_fluxmodel_scaling_factor(x['ns'], [x['gamma']])\n", + "print(f'Flux scaling factor = {scaling_factor:.3e}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hence, our result corresponds to a power-law flux of:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.423e-15 (E/1000 GeV)^{-2.17} 1/(GeV s cm^2 sr)\n" + ] + } + ], + "source": [ + "print(f'{scaling_factor:.3e}'' (E/1000 GeV)^{-'f'{x[\"gamma\"]:.2f}'+'} 1/(GeV s cm^2 sr)')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Evaluating the log-likelihood ratio function\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sometimes it is useful to be able to evaluate the log-likelihood ratio function, e.g. for creating a likelihood contour plot. Because SkyLLH's structure is based on the mathematical structure of the likelihood function, the `Analysis` instance has the property `llhratio` which is the class instance of the used log-likelihood ratio function. This instance has the method `evaluate`. The method takes an array of the fit parameter values as argument at which the LLH ratio function will be evaluated. It returns the value of the LLH ratio function at the given point and its gradients w.r.t. the fit parameters.\n", + "\n", + "In our case this is the number of signal events, $n_{\\mathrm{s}}$ and the spectral index $\\gamma$. If we evaluate the LLH ratio function at the maximum, the gradients should be close to zero." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on method evaluate in module skyllh.core.llhratio:\n", + "\n", + "evaluate(fitparam_values, tl=None) method of skyllh.core.llhratio.MultiDatasetTCLLHRatio instance\n", + " Evaluates the composite log-likelihood-ratio function and returns its\n", + " value and global fit parameter gradients.\n", + " \n", + " Parameters\n", + " ----------\n", + " fitparam_values : (N_fitparams)-shaped numpy 1D ndarray\n", + " The ndarray holding the current values of the global fit parameters.\n", + " The first element of that array is, by definition, the number of\n", + " signal events, ns.\n", + " \n", + " Returns\n", + " -------\n", + " log_lambda : float\n", + " The calculated log-lambda value of the composite\n", + " log-likelihood-ratio function.\n", + " grads : (N_fitparams,)-shaped 1D ndarray\n", + " The ndarray holding the gradient value of the composite\n", + " log-likelihood-ratio function for ns and each global fit parameter.\n", + " By definition the first element is the gradient for ns.\n", + "\n" + ] + } + ], + "source": [ + "help(ana.llhratio.evaluate)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "llhratio_value = 6.573\n", + "grad_ns = 0.001\n", + "grad_gamma = -0.027\n" + ] + } + ], + "source": [ + "(llhratio_value, (grad_ns, grad_gamma)) = ana.llhratio.evaluate([14.58, 2.17])\n", + "print(f'llhratio_value = {llhratio_value:.3f}')\n", + "print(f'grad_ns = {grad_ns:.3f}')\n", + "print(f'grad_gamma = {grad_gamma:.3f}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the `evaluate` method of the `LLHRatio` class we can scan the log-likelihood ratio space and create a contour plot showing the best fit and the 95% quantile." + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [], + "source": [ + "(ns_min, ns_max, ns_step) = (0, 80, 0.5)\n", + "(gamma_min, gamma_max, gamma_step) = (1.5, 4.0, 0.1)\n", + "\n", + "ns_edges = np.linspace(ns_min, ns_max, int((ns_max-ns_min)/ns_step)+1)\n", + "ns_vals = 0.5*(ns_edges[1:] + ns_edges[:-1])\n", + "\n", + "gamma_edges = np.linspace(gamma_min, gamma_max, int((gamma_max-gamma_min)/gamma_step+1))\n", + "gamma_vals = 0.5*(gamma_edges[1:] + gamma_edges[:-1])\n", + "\n", + "log_lambda = np.empty((len(ns_vals), len(gamma_vals)), dtype=np.double)\n", + "for (ns_i, ns) in enumerate(ns_vals):\n", + " for (gamma_i, gamma) in enumerate(gamma_vals):\n", + " log_lambda[ns_i,gamma_i] = ana.llhratio.evaluate([ns, gamma])[0]\n", + "\n", + "# Determine the best fit ns and gamma values from the scan.\n", + "index_max = np.argmax(log_lambda)\n", + "ns_i_max = int(index_max / len(gamma_vals))\n", + "gamma_i_max = index_max % len(gamma_vals)\n", + "ns_best = ns_vals[ns_i_max]\n", + "gamma_best = gamma_vals[gamma_i_max]" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1.5, 4.0)" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAe8AAAF5CAYAAAC2tqKTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3deZxcZZX/8c/pLd2dlSRkJRsQCMsoakQURxFwBGVxGRBcCCMadcQFFUFEQWUJoyOooBIRDQOyyIAQB0QmijtgQH7KnhC2QEL2kK338/ujKmMISXX3uV237q36vl+venW6Uk/dp6tv16lzn+c5j7k7IiIikh91le6AiIiI9I+Ct4iISM4oeIuIiOSMgreIiEjOKHiLiIjkjIK3iIhIzqQSvM3sNDN7yMweNLNrzazZzEaa2Z1mtqj4dZc0+iIiIpJ3ZQ/eZjYR+BQw0933B+qBE4AzgQXuPh1YUPxeREREepHWZfMGoMXMGoBW4HngWGBe8f/nAe9MqS8iIiK5Vvbg7e7PAd8EngGWAevd/VfAWHdfVnzMMmBMufsiIiJSDRrKfYDiWPaxwDRgHfAzM/tAP9rPBmYDDB48+DUzZswoSz9FRKR/7rvvvlXuvutAP+/b3jLYV6/pDre/72/td7j7EQPYpcwpe/AGDgeedPeVAGZ2E/AG4AUzG+/uy8xsPLBiR43dfS4wF2DmzJm+cOHCFLosIiK9MbOny/G8q9Z0c88du4XbN45/YvQAdieT0hjzfgY4yMxazcyAw4BHgFuBWcXHzAJuSaEvIiIiuVf2zNvd7zGzG4H7gS7grxQy6SHADWZ2CoUAf1y5+yIiInngdHtPkicYbmZzgfnuPn+AOpUpaVw2x93PAc7Z7u52Clm4iIjI/3Ggh0TbVa9399kD1J1MSiV4i4iI9EcPiTLvqqfyqCIiIjmjzFtERDLFcbo90WXzqqfgLSIimZNwzLvqKXiLiEimONCt4F2SgreIiGSOMu/SNGFNRESqzXAzm2tmR1e6I+WizFtERDLFIemENa3zFhERSZtWeZem4C0iIpniuCas9ULBW0REssWhW7G7JE1YExERyRll3iIikimFjUmkFAVvERHJGKMbq3QnMk2XzUVEJFMc6PH4Da3zFhERyR2t8xYREUmbLpuXpuAtIiKZUtiYRMG7FAVvERHJnB5X8C5FwVtERDJFmXfvNNtcREQkZ5R5i4hIpjhGt3LLkhS8RUQkczTmXZqCt4iIZIrGvHun4C0iIhljdLsum5eiV0dERCRnFLxFRCRTCruK1YVvqLa5iIhI+hKOeau2uYiISJrcNebdG706IiIiOaPMW0REMqdHS8VKUvAW2YkZ516cqP2j5542QD0RqS2Fdd66MFxK2V8dM9vbzB7Y5vaimX3GzEaa2Z1mtqj4dZdy90VERPKgMOYdvdWCsv+U7v6Yux/g7gcArwE2AzcDZwIL3H06sKD4vYiI1LgBWCpW9dK+bH4Y8IS7P21mxwKHFO+fB9wFnJFyf0R2qm1iV6W7ICKyQ2kH7xOAa4v/HuvuywDcfZmZjdlRAzObDcwGmDx5ciqdlOqy/+nBseu9kh13r/Nix338bI2Vi3RrY5KSUgveZtYEHAN8sT/t3H0uMBdg5syZXoauiexQ/bCOeNtFrQPYE5Haoi1Be5dm5n0kcL+7v1D8/gUzG1/MuscDK1Lsi4iIZFhPjUw8i0ozeJ/IPy6ZA9wKzALmFL/ekmJfpIZs3LM71G5oSzzzfnFcU7itSK3TUrHepfLqmFkr8Fbgpm3ungO81cwWFf9vThp9ERERybtUMm933wyM2u6+1RRmn4uUVf2I9lC7lqbO8DE3D41n7SK1zjFNWOuFKqyJiEjm1Mp67SgFb6l6rcGx69bGePa8qVlj3iJR7iStlDbczOYC8919/gB1K1MUvEVEpNpoP2+RvBs8KJZBtzTGx7xbmzTmLRJn2lWsFwreIhnz5qO/EWr32/mnD3BPRCrDSXzZvOopeEvVG9wUm23e2hDPvKMz1e2SHVYJFqk5WuddmoK35MY+X4nVCp/wlgHuiIiUlWP0aKlYSQreUvWiGXRTXXxXsZbgMRe/uT58TBGpHQreIiKSObpsXpqCt+TG5qmxbLa5PpZBN9fHx7yjx+waqT3ERRxtTNIbBW8REckYo1tLxUpS8JbcGDQsNms8mkEPSjDm3VQX28msYXA82xepFsq8e6dXR0REJGeUeUtuDGmNZd6DguPPjdYTagfxbL+5WZm3CKDL5r1Q8BYRkUxxN10274WCt+RGtF54dL12NGMHGNQTnOGeYA9xkWqi8qil6dURERHJGWXekhtDgzXKo7PGGyw2YzxJ2yQ7mYlUCwftKtYLBW8REckY02XzXih4S25Ea5RHM+/GBJl39JhJxtlFqkVhnbcy71IUvEVEJHNU27w0BW/Jjeja6WgGnSTzbqyLrRGP1kQXkdqi4C1SJd5ad1y47Z09PxvAnogko/28e6fgLbnRknbmnaC2eUNP7JjNDbFjvvjGlaF2IlnVo8vmJSl4S+r2uuDiULs3/csAd0REMskdupV5l6TgLbkRrZQWzaATjXkH20Z/xqe//oZQO5GsytJlczMbDHwP6ADucvdrKtwlXZcQEZHaY2ZXmtkKM3twu/uPMLPHzGyxmZ1ZvPvdwI3u/hHgmNQ7uwMK3iIikimFCWt14Vsf/QQ4Yts7zKweuAw4EtgXONHM9gV2A54tPix+SW4A6bK5pK5jt3TLnEYvYdfjoXZJjtlUF2vXMToT7yciA6bcW4K6++/MbOp2dx8ILHb3JQBmdh1wLLCUQgB/gIwkvQreIiKSKQNQYW20mS3c5vu57j63D+0m8o8MGwpB+3XAd4BLzewdwPwkHRsoCt6Supahecm8Y4VWAOot1rYxmHnbEG1oIrKNVe4+M9BuR58Y3N03Af+WsE8DSsFbREQyxvozdj2QlgKTtvl+N+D5SnSkN6kEbzMbAVwB7E/hisiHgMeA64GpwFPA8e6+No3+SGUNbYll3mln0HWW/ph3dCvRphZl3lJdKrQl6F+A6WY2DXgOOAF4XyU60pu0Ptp8G/ilu88AXgk8ApwJLHD36cCC4vciIlLjthZpid6A4WY218yO3tkxzOxa4M/A3ma21MxOcfcu4FTgDgpx6gZ3fyiNn7m/yp55m9kw4E3AyQDu3gF0mNmxwCHFh80D7gLOKHd/pPKGDmoLtWuuS7c8anTcGqAu2Da8lWijNjSR6pLwsvl6d59d6gHufuJO7r8NuC3JwdOQRua9O7AS+LGZ/dXMrihWqxnr7ssAil/H7Kixmc02s4VmtnDlStVvFhERSWPMuwF4NfBJd7/HzL5NPy6RF6f3zwWYOXNmfBBSMmNIY0eoXdoZdF0F1nmHtxJV5i1VRLuK9S6NzHspsNTd7yl+fyOFYP6CmY0HKH5dkUJfREQkB3qw8I0+jHnnXdkzb3dfbmbPmtne7v4YcBjwcPE2C5hT/HpLufsi2dDakG7mnWSDkajwDPdgu+ZGzTaX6jEARVp6HfPOu7TWeX8SuMbMmoAlFBa71wE3mNkpwDPAcSn1RUREJNdSCd7u/gCwo2o3h6VxfMmW5vrorPHYuG40m00iukY8epVgUL3GvKW6VKhIS26owpqIiGSLa8JabxS8JXUtwcw7ms3WJ6iUFtUUvEoQrW0e3Y1MJIucilVYyw1dlxARkczpKWbfkRuabS4y8KJVxKLZbJLdwaKia8Sj7ZrqlXmLbEOzzUVERNI0AEvFqp6Ct6Qu7X25o7PUuxOMKsX38w5eldCYt1QZBe/SFLwlZPqFF4fbvucdA9gRGRBHjv9EqN3tyy4b4J6IqDxqXyh4S+qiu4NF12uHZ5t7gl3Fon2NjnkHM3aAVe8cFG4rUi6abV6aZpuLiEi10WxzkR3p3K093Da+O1h0Bncwg7YkY97BvgbHyhsSjHkv+fc9w21FysJV27w3Ct4iIpIpmm3eOwVvCRk8rC3cNu1Z49GZ3wm28w6vLY+/NvHx+fZRmqku2aPgXZrGvEVERHJGmbeEDGuJZ96DgrPNo9lsuMJagg/+0bHraF+TjHkzRJm3ZIuWivVOwVtERDLHFbxLUvCWkGGD4pl3OIMOzzaPDl7Hx5GjP2Pa+4ADNDTHroSIlFPCdd7DzWwuMN/d5w9QlzJFwVtERDLFtVSsVwreEjKksSPcNpolhtdrB8Uz9rjwLPW6+GvT1BSvziYilaHgLSIimaMx79IUvCWktT79zDu6XjvartvTr7AW7WuSqxJNjZptLlmj2ea9UfAWEZHMUeZdmoK3hLTUx2cohyulpbxTFwmqltUFdySLjrMnmW3e3KAxb8kWlUftnSqsiYiI5IwybwlpSTDmnfZ67fh4cJIx73TH56PrwwGalHlL1nhhuZjsnDJvERHJnB4sfEP7eYvs2KC6eLaW9u5gedpVLHqVIPwzAo1J6qKLlIGTeMJa1RdpUeYtIiKSM8q8JSRJ5p36rPEcic4HSDLbvKlembdkjdZ590bBW0REMkcT1kpT8JaQ5uCe3JBktnme1nmn+84T3T8coElj3pJBKtJSmoJ3jZtx7sWhdh88boA7IiJS5K7g3RsFbwkZlCDzTntGdXgNdJLZ5tGZ8RWYD6DMWyR/UgneZvYUsAHoBrrcfaaZjQSuB6YCTwHHu/vaNPojIiLZpglrpaWZeb/F3Vdt8/2ZwAJ3n2NmZxa/PyPF/gjQtnt7qF2i2c3h2ubVP4MlOnadZMy7QZm3ZJAmrJVWyXXexwLziv+eB7yzgn0REZEMcbfwrRaklXk78Cszc+Byd58LjHX3ZQDuvszMxuyooZnNBmYDTJ48OaXu1o6hI7aE2iXJvKM1yqOi48hYgtrmwbShEmPeDQmydpFycGonCEelFbwPdvfniwH6TjN7tK8Ni4F+LsDMmTN1IUUkQ/b7Ymy1wkMXnjbAPRF5ieFmNheY7+7zK92ZckgleLv788WvK8zsZuBA4AUzG1/MuscDK9Loi7zU8NZY5h0dt4b0Z41Hs9I8jQRXYsz73pteET6mSG8SZmqqbZ6UmQ02s6Fb/w38C/AgcCswq/iwWcAt5e6LiIjkgGvMuzdpZN5jgZvNbOvxfuruvzSzvwA3mNkpwDOAyn6IiEiBBklLKnvwdvclwCt3cP9q4LByH19KG9qU/lKx6GXs8MSzoCTHixeiSf+1aQxect8yVu+uIpWiCmsiIpI5tXL5O0rBu8YNa4xl3tHMMknbaMYenejWneDNIzwpL/jahEvAEp/s1j0kT1P6JG9UpKU0BW8REckUR5l3bxS8a1xrQyzzbkoy5p2ToiB5KsdaiTHvuub4ckGRkhxQ8C6pkuVRRUREJECZd41rqY9t7dmYpEhLymPX0eP1kGDMOzqjPrqVaAWKtDQM0pi3lI/GvEtT8BYRkexR8C5JwbvGtQYz7+h6ZEg/gw6XR03wM0bfeKJj10k2e4n+LhsbNeYt5VI7ldKiFLxFRCR7lHmXpOBd41rqOkLtkqzzrgVJ1l2nLfq7bGzQmLdIpSh4i4hItrjWefdGwbvGNdfFxrwTbQmaeoW1UDPqKzDdNTp2nWS2efQqQVO9Mm8po2R/ftrPW0REJH2JMu+q389bwbvGRXcHSzS7WTNRBlySOQjRc6BJY94iFaPgXSVe/77/DLV719kD3BERkYGgz/glKXjXuEHBMe9KjLFGx66jklwhCI/rV6Due/SYjcHKbCJ9ouBdkoK3iIhkizYm6ZWCd5V4/vBYFhQd76zEuHXax0xS2zxtiSreBV/XRs02lzJSbfPStKuYiIhIzijzrhKDR24OtYuu104yuzkvs82TjLFH14jH67cn+H0Ex7wb6lRlT8ooH28TFaPgLSIi2aMx75IUvKvELoO3hNqFx7wTzIoOV0oLHzGmVkZ066KZdwVmxkvtyNH2ABWh4C0iItniVP1lczMbDLS5eyhPUPCuEiMGpZx5V/tfVkLhtezR/bwrcCVEtc1F+s7M6oATgPcDrwXagUFmthK4DZjr7ov6+nyabS4iIhljhTHv6C2bfgPsAXwRGOfuk9x9DPDPwN3AHDP7QF+fTJl3lRjW1BZq1xidbZ5kXXF0l6+U/yZ7tNC0JI15S1lV35/f4e6+o5KW+wCHuvt7zKyxr0+m4C0iqTv8TeeH2v3v7740wD2RzKqy4L1t4DazA4D3AccDzwA/2/4xvVHwrhKDGzpC7eJ7a1f/Ou88SfL7iI6XR9ut/MKUUDuRPDOzvSiMeZ8IrARuBN7g7s9Hnk/BW0REsqf6PuM/CvwP8DZ3fybpkyl4V4nB9e2hdpptXh7hSmmV2FUs2NfomPfzb2oNtZMaUp0bk7yHQub9ezP7FYVL5QuiS8U021xERDLHPH7LIne/2d3fC+wL3AV8ClhqZleY2RH9fb5+Zd5m1ujunWa2B7Da3df1sV09sBB4zt2PMrORwPXAVOAp4Hh3X9uvnstLtNSnuy93otnmwXb1Ke/y1Z2jqwuJdhVL+RxoG5Wf11UqqEpPE3ffBFwDXFOMhccBXwB+2Z/n6e/76AVmNhE4D7i0H+0+DTyyzfdnUrhcMB1YUPxeRESkKpnZy7IPd1/j7pe7+6E7e8zO9HfMexhwLHAh0KfF5Ga2G/AO4Hzgs8W7jwUOKf57HoVLCGf0sy+yjea6WObdGKzgrTHv8qjMPumxzLuxLnbu9LRqfbjUpN+Y2X8Dt2w7Yc3MmoA3ArMoFHL5SV+erL/B+y5gorv/zcz6WsbtEgqXBIZuc99Yd18G4O7LzGxMP/shIiJVLKtj1wkcAXwIuNbMpgHrgGagHvgVcLG7P9DXJ+s1eJvZoRRqsa4DHqTw6WGQu/+wD22PAla4+31mdkhfO7Xdc8wGZgNMnjw58hQ1YVAw867I7Obg0HV0rLwSeV7aGXSS/dXjxwz+jM2qiS59kIPZ5ma2O/AlYLi7/2upx7p7G/A94HvFSmqjgS19nTu2vb68H14N/IJC7dXdga8AD/Xx+Q8GjjGzp4DrgEPN7GrgBTMbD1D8umJnT+Duc919prvP3HXXXft4WBERyS1PeOsDM7vSzFaY2YPb3X+EmT1mZovNrOR8LHdf4u6n9OMn29qu092XRQM39O2y+WJ3v7n475/158nd/YsUirBTzLw/7+4fMLNvULi+P6f49Zb+PK+8XHOwRnl4PXKOxrzDs9uTfPDPz8sTnjXeEBzzrmtS5i2Z8BMKE6+v2npHcWXUZcBbgaXAX8zsVgqXti/crv2H3H2niefOmNlnd3D3euC+Ab1sDvzWzE4DLnEfsJ0a5gA3mNkpFOq6HjdAz5tre3/t4nDbj+kVFJFqkizajDazhdt8P9fd577k6d1/Z2ZTt2t3IIWEdQmAmV0HHOvuFwJHJerRP8ws3uYXv38H8BfgY2b2M3f/j748SV+C937A/sAZZnYf8ADwgLv3Nwu/i8KEN9x9NXBYf9pLadEx7/j+0aFmQPrrtfMkmgUnmbsQr28fzNgblXlL7xJOWFvl7jMD7SYCz27z/VLgdTt7sJmNorCS6lVm9sVikO/NKODV7r6x+BznUKhz/ibgPmBggre7v7t4gBb+EchfRz8voYuIiPRZZYaedpRZ7LQnxUT0Y/08xmRg252kOoEp7r7FzPpc57rPS8XcfQuFKmkLe3usxHRO3xJu2xQc826qSG3zWOZd3/f6BS/RrX25yyJama2hQZm3ZNZSYNI23+8GhHb9KuGnwN1mdguFN8OjKCwfGww83NcnUW1zERHJnmSzzYeb2VwzO7qfR/0LMN3MphWLp5wA3Jr4Z9mGu38d+AiF5dfrgI+5+9fcfZO7v7+vz6NdxTJklxGbwm2ju4MlqVEelfYnxmjG3pkgYw/PCQgeMtH+6inXNm+oV4U1KW0ANhhZ7+6zSx7D7FoKlT5Hm9lS4Bx3/5GZnQrcQWGG+ZXu3tel0f3RRaEEhVO4bN5vCt4iIpI9ZS7S4u4n7uT+24DbynVcM/s0hcz7vylcNr/azOa6+3f78zwK3hkysmVzuG18X+5g1hVqlUxdcKy8J7yWXbPiS4nuZNaoMW/pi+qdqnIK8Lri7mKY2UXAn4F+BW+NeYuIiKTH4CU7QnUTmMWrzDtDRjS1hds2plxhLYnoGHQtSDJ2HRWtUR6tEdBYpzFv6V3CMe/hZjYXmO/u83t9dLp+DNxjZjdTCNrvBK7s75MoeIuISPaUecJapbj7t8zsLgp7fxgwqz9lUbdS8M6QIY3xzDu6Xjte0SvUTMokvMNXkmNGzx1l3tKb5LPNM8fMNvDSjyS2zf+5uw/rz/MpeIuIiJSZuw8dyOdT8BYRkeypssx7oCl4Z8iQ+o7eH7QT0clDUUmWUUWXfKW/dCv+7pGnLVOjRVqik+sag1uJSo2p3glrA0LBW0REMqfcFdbyTsE7QwY39HlDmZeJbkySpwwxbSqCUB6asCaSnN6fREREckaZd4Y014Xq0wPxpULRzFuf+sqjEldCovMlwhuTKPOWvtBFwZIUvEVEJFuqcJ33QFPwzpDWuvhs83iRlvAhw7Thx8CLzhivxDErsQ2t5JBOk5J09VNERLLHE9yKS8XM7OjU+50SZd4ZMijRmHd0rW60PGr6n/ui68Oj4r+N2hCdZ9FQgasEUnO0VEz6b+r3vxlqd9pbBrgjIlXmVZ+4ONz2r5edNoA9kXIyNObdGwXvDGm2eK5XiTHPqLQz6ErI03hUfcrvktHZ5k/9dM8B7olkmoJ3SQreIiKSLZpt3isF7zJoHb8x1K4xOGO80DY48zd4vFrInustwc/o1f/Ok/Zs883jQs1EqpKCt4iIZE/1f/5NRMG7DEYP3RRq1xisTw7xHZ7qqz+BzpVoVpqnGvXRWeqdQ/LzM8oA0K5iJSl4i4hI5mhXsdIUvMtgl0FbQu2iVdIg/cwrSZW0RGPJAT01MP6cRPSqTV1wzLshuJ93T3N+VlTIANCfbUl5WtEiIiIiKPMuixFNscw7yZh3fKw0O4PebZvreOJvg+loN7o66ujsMLo6jc72Oro6jM7O4teOwv9v/b/ODmNQSw/v/sQyho18+WtYCzPj8yR6rvogZd414x9lTmUnFLwlE559vJkLP7QXzz/Z3Oc2DU09NDQ6jU09bN5Qz72/GsE5P32MsZPiG7yISDZonXdpZQ/eZtYM/A4YVDzeje5+jpmNBK4HpgJPAce7+9py9ycNwxraQu2SrPPO877cv71pFN8/YyrNg7s5/QeLGDmuk8Ymp6Gxp/C1aevXQqBuaHIaGp1th84fvncI55+8F1969z6cd+OjjJvSXrkfqEbEz7lYO2tU5l1TFLxLSiPzbgcOdfeNZtYI/MHMbgfeDSxw9zlmdiZwJnBGCv2RjGjfYvzonCn86pox7Pu6F/ncZU8wanysROy+B27k6zc8ylfeO4MvHzeD8258hLGTlYGL5JUy79LKHrzd3YGtJccaizcHjgUOKd4/D7iLKgneLfWxoNGUaJ13umd60nHkxf9vMJd8eneWLmrhPac+z/tOX0p9wrNx9/0387XrCwH8nBNncNGtDzN8VPw1rRXRWePx4wUz9gZl3iJbpXLV1MzqzewBYAVwp7vfA4x192UAxa9jdtJ2tpktNLOFK1euTKO7UkZdnca1/zmRLxy9L1s21HPOTx/lg19MHri32n3/zXz5qsdYs7yJ82ftRfuWLAwMiEi/aT/vklKZsObu3cABZjYCuNnM9u9H27nAXICZM2fm4kLKkPrYeGuSMe+6YCKc5prrh+8ZytyzpvLsY60c8u7VzP76MwwZ0U1fP0P29HE98t6v2cRnL32Ciz6yJ9/7wlQ++e0nSHlpebxyXfAMj+7nnkQ0Y4+2q1fmXTuSzzZXkZaB5O7rzOwu4AjgBTMb7+7LzGw8haxcqtD61Q1cfcEkfn39GHbdrZ2zf7yIA/9lfVmPedCRaznx88/x02/sxl4zN3DESTq9RPLCijfZuTRmm+8KdBYDdwtwOHARcCswC5hT/HpLufuSlua62KSrPNWn7kvG3r6ljvlXjOWmy8bTvqWOd3/ieY7/zPO0tsaOWdfPUZ7jP7Wcx+4byo/PncK+B25g8oz+rb/X+vDyiO4fXl+vzFtkqzQy7/HAPDOrp3B99AZ3/4WZ/Rm4wcxOAZ4BjkuhL/0y/cKLQ+0+cuwAdyRnurvhNz8bzbXf2I3Vy5t47VvXctKXnmXS9K1L6NIZh66rg89c8iSfeMt+fPe0Pbjw1odoaMzPBySRmqY/1ZLSmG3+N+BVO7h/NXBYuY9fCa11sTHvJOOW8TW3AxtIt2yq45z3zuDxvw5h+qs28tnLnmC/gzYM6DH6Y/ioLj564ZN8Y/ZezJ87jnd9YlnF+tIXubr6kvYKhzpl3rVES8VKU4U1GTDd3XDxqXuw+P8N5lMXL+Etx61KfaLYjhz09rW89l/WcuO3J/Kmd68KryUXkRQpeJek4F2C7bWx9wftQHTWeJJdxSq9IModfvzVydz7q1348Nef5tDjV1W4Ry918jlP85lDX8FPL5rEJy9ZUunu1LRohbUGjXnXFgXvkir9ni9V4uc/GMcvfjSOo05ZzlEfeqHS3XmZcVPaOWLWC/zuptEsXdz3+ukiIlmkzLuEXYfFMu/obPNo5SmIryuO7iq27Vj5gutHMe+8ybzx6DV8+NylvY6jR2dx9wQ/im+dGf+eTyzjzqvHcNN3JnLad3vPvjtr5JN/dD/vqPD68Loa+YUIuMa8e6PMWxL5w/xd+O7np3LAm9Zz2refpC7DZ9SI0V289X0r+cOtI1m9rLHS3RGRUpJVWKt6yrxLGN2yKdQuOuaddgaU1N2/HMF/njqNGTM3ctaPnqBxUPb/ao46ZTn/c+VYfnnVGN5/xnOV7k6uxSusBa+gaLZ5TUmYeQ83s7nAfHefPzA9ypYM50mSZX+6bQQXfXR39nzFZr48bzHNrfl4Yx07uYNXHbKeX9+wK93x+YEiUm7JMu/17j67WgM3KPMuaURT/ypybdVssV3FkqybjY9d97/d728exXdP253pB2zm3Ksfp3VoPgL3Voe9dyX/8dHp/P2PwzjgTS9WujvSR9HKbCLVSJm39MttV47lkk/uyb6v3ci51+QvcJlYMlEAACAASURBVAPMPHwdza3d/Pl/Rla6KyKyE+bxWy1Q5l3CsIa23h+0A9Ex7ySzzcv9KaynB679xm7c9N2JvPZtazjje0/S1JyPPcS3/3jR1Oy8+tB13HPHCD56IZmeZFeNoleYNOZdQ2po4lmU3rakVx1txiWn7slN353I4Seu4PTLF6UeuAfazMPWs25lE08/0lLprojIjmi2eUnKvEsY3BCrUd5kXaF2lahr3dvuYGuWN3LRR6bz2P1DOOlLz/Cujy/HzHK/49Yr3ljYkvRvfxjOtP1icxskXcq8Rf5BwVt26tG/DOGi2XuyZWM9X5i7iDe8Y22luzRgRk/oZOzkNh67b0iluyIpefXHY7sE3v/90wa4J9Ibo3bGrqMUvEsYWh8b847Wbk4023yAdwB5dOEQzj5uBqMndnDutY8xpZ97YZdSb7HRmm4f2Mxr+gGbeHShgnfa0l4fDrDxxgnhtlIhCt4l1UTw3uNb3wq1+9DbBrgjOfHimga++fE9GDWhg2/84iGG7lKdC6J3338zf7h1FBvX1zNkeHX+jCJ5Za7oXUpNBO+o1rrYeu3o7mB1CZLngRqD7umBSz69O+tWNXLRrQ9XbeAGmLRX4WrC0sdbmPHaWB37WpZ6TfQEqdiW0QPYESm/Gpp4FqXZ5vIS139rIvf/egQf/urT7PFPmyvdnbKaMK0wLLL8mUEV7omISP/URObdNG1DqN2g4O5gjTmabb7tDmB33zGC6y+eyGHvXcWRH1yNVflnu9ETC6sJVi5tqnBPakslapt3DQ43lQrRhLXSaiJ4S++efLiFb506jekHbOJj5z/NAM9/y6RBLU5zazcb1unPQCRzFLxLqol3rbHDYpl3s8Uy72gGnSTPjdY2B1i7ooHz/21PBg/r5qwfLWZQS+/9j84Yz5rWYd1s3lBf6W5IHySZbd7dh3NaskWZd2k1Ebxl57ZsquPrs6azfnUDF970GKPGxT6w5FVzSw/tmxW8RTJHwbukmgjeuwb35W4OjnmHx/RSvlTd1Wn858d3Z8mDrXzpysXs+YrqnqC2I43NPbS3VcdVhGqXJPPuaVR1thpT9ft510Twlpfr6YHvfW53Fi4Ywb/PeYrXvnV9pbtUEfX1Tk/1roYTyafku4Otd/fZA9SbTFLwrkHu8JNzpvD7m3blA194jiM+uKrSXaocA7wGZueJ5I0um5dUE8F7l6bY5eDokq8mYqlckklnfS3S4g7/dd5kbv/xeI6evYz3fmo5lvNNRpLwHjQzJmX1wdc7yWVzGvU7zhPVNu+dBvtqiDtcfcEkbr18PEeevJxZX36mJpaEldLVaTQ26V1CRPKlJjLvYY2xDUbiZU6zFwx6euDH507htivH8baTXuCUr9fGWu7etG+pp6lZk5mqXkP2/ialF6ptXlJNBO9a190Fl585jQXXjeGojyzj5K8o496qbVMdLUM0Y00kazKYA2VKTQTvIcGtPdMuc5qoSMtOonFHm/GtU/fg7ttH8t7TnuOEzz2HKXIDhQ/2m16sZ/AwBe88SLIxCfW6upIr2pikV7kK3n9fvZypV83pd7uTX12GzuSAO3z/jGnc88td+PDXn+aoD71Q6S5lyoa1DXR31TF8dG0VphHJg+C27zUjV8E7Krq1Z2N41ni0SMvAZsQLrh/Nb24czXtPe26ngTta5rQuwXWCnpS3ktyZVc8XNiQZPSF2fki6kswlsXqlcVJdaiJ416KnHmlh7pem8k8Hr+f4056rdHcyafnTha1Ax01pr3BPRORl9HmrpLIHbzObBFwFjAN6gLnu/m0zGwlcD0wFngKOd/e1pZ6robGbMWNe7HcfWutib87x2eahZolsmwlvXF/PnA9PZ/CwLj5/2ZM01mtF4I48v6QZgHFTYnMiyqG7htfcl1OdMu/c0YS10tJ4V+8CPufu+wAHAZ8ws32BM4EF7j4dWFD8XhLq6YGLPzWNlUubOGPuEnbZNTbprhY8+3gLoye00zo0G5fxRaTIKUzaid5qQNkzb3dfBiwr/nuDmT0CTASOBQ4pPmwecBdwRqnnaqzrZuzg/m/vGd1gJP3Z5sk/S13zHxP5y/+O4GPnP82+r92Y+PnKIfpzdg/wWPlTj7QyecaWAX1O6V1d8PeYaMy7Th/Q8kaZd2mpXk81s6nAq4B7gLHFwL41wI/ZSZvZZrbQzBZ2rNMbbSm/vXkkP/vueN72/pUcOWtlpbuTae1bjGcfb2GP/WM7zomIVFJqE9bMbAjw38Bn3P3Fvq41dve5wFyA0fuM9pGD+l+nfJDFMu+012snqW3+8L1D+PZnp7LfQRuYfZ6KsPRmyYOD6ek29nhleYJ3t7KGTKmr0y8kd/QrKymVzNvMGikE7mvc/abi3S+Y2fji/48HVqTRl2r0/JJmzv+3PRk7qZ2zrlisWt198MhfhgAwY2Y2hxZEatnWjUmit1qQxmxzA34EPOLu39rmv24FZgFzil9v6e256q2HEY39z7zjY97Bdd4pZr1rljdy3vv2oa7eOfeqxQzfpQc0Y7lXD989jAm7b2HE6J3Pa+jRR/9MSTbmrd9lrtTQxLOoNC6bHwx8EPi7mT1QvO8sCkH7BjM7BXgGOC6FvlSVjevqOe/9+7BhbQNzbnyc8VNVbKQvujqNB+8eyiHvruF9zKXP3jbz3FC7OxbG2on0RRqzzf/AzlPBw/rzXPXmDGvo/5rc6HrtcKW0YObb1z25ATZvqOeCD8xg2ZPNfOmqx9jzFbE9ywdihnvePHb/YNo21fOKf+5/zQDJp2jWPvl8ZX+VUiuXv6NUYS2H2jbXccGsvVjyYCufv3wxr3jji+hX2Xf3LRhBfUMPr1TwFskuBe+ScvWOX2c9DA3sENZsscvJWdyXe8vGOs6ftTePLRzKZy5bzIFvK1mUToDu7cbO7r1zBDNeu5HmoV1VMyu8pwauoCQa8w62Xfma4eFjSjIZfPvNlOr/i68im16s5+vvn1EI3Jcu5uCj11S6S7mzdFEzSx9v5aAj9dqJZJYDPR6/1YB8Zd54qE55dNZ4U7QSVKhV6V3FXlzTwLnv25unH2nh9B8s5vVvX8u2Uwmiu4PVmj/9YhQAr8to8O7x9FcKdNfAZ/ho3YP2EQPbD5GBkqvgXas62oyvvn9vli5q4Ys/WsTMw9dXuku55A6/v3kU+x30IqPGaw9vkUzLQQJtZu8E3kGhQuhl7v6rtI6dq+Bdh4fWbIcrpQU/rQ/kvtzucPlZU3nib4M568ePV0XgrtR+3ov+Opjnl7RwzEeXVeT4UjnR8fKOwQPcEemzco95m9mVwFHACnfff5v7jwC+DdQDV7j7nJ09h7v/HPi5me0CfBNQ8JaCX141hgXX78rxn3mOA/9lXaW7k2sLrhvDoJZuDj4mm5fMRWQb5S/S8hPgUgpbVgNgZvXAZcBbgaXAX8zsVgqB/MLt2n/I3bdWBj272C41uQreZh6qU56X3cG2b/fgn4dwxVcmM/PQdbzvc8tqck32QNm8oZ4/3DKKNxy9htahsTkQ/VELe1jlaaw8Otu8p2mAOyJ9Vu7M291/V9wsa1sHAovdfQmAmV0HHOvuF1LI0l/ax0IF0TnA7e5+f3l7/FK5Ct61ZPnTTVz4kT0YP7Wdz132JHX5eZ/MpLtuHE3bpnqOOOmFSndFRMpvtJkt3Ob7ucVNrnozEXh2m++XAq8r8fhPAocDw81sT3f/Qf+7GpOr4B0d8260dGeNJ7VxfT1fnzUdd+PsHy9m8LDyZ4rVrKcHbv/xWPZ85Ub2PKDvu4htvz48y7pzVM++EmvS45l3fs6BquIknbC2yt1nBtrt6A9ppz1x9+8A3wkcJzHlcxnT2WFc+OE9WPbUIM6c+wQTdu//0jh5qYV37sLzS1o4erYmqonkQWFXMQ/fElgKTNrm+92A55M8YbnkL/MOjHnXB0cgo7uDRWub9/TAdz83lb//aRifueRJXnHwhlgHqlRkly93+Pn3xrPrbu28/h3Zn6hWiQy6x6v/M3x0AUhPozLviqnMxJG/ANPNbBrwHHAC8L6K9KQX1f9XmyPzLtiNu24axQe+8ByHHre60t2pCn/7w1Aeu28o7/z356nP1UdVEUlguJnNNbOjd/YAM7sW+DOwt5ktNbNT3L0LOBW4A3gEuMHdH0qny/2Tq7czw2kKzByPjnlHM+iIW743npu/P453nLyC935qOdbPYw/ETPRvXLaGmQc085aDW3f6mN/8cTMLH2jj9E+MTHy8cnOHa781gV3GdvCW965IdX/uPI1B14LoOu8auCiRWQkvf69399mlHuDuJ+7k/tuA25IcPA06NTPgjnljufqCKbzp2DXM/tqz4Ut8Sc08oJkTZi/nN3/c8faiv/njZk6YvZyZBzSn3LOY+349nIfvHcp7Pv0cjYN0+VMkNzzhrQbkK/M2D9Upj67XjurPvtwLrtuVK740jZlvXctnv/0k9fVl7Fgv3nJwK9fNHccJs5dz3dxxL8nAtwbu7e/Pqu5uuOrCiYyb0sahJ6zovUHOdVcgRayFsXLqayQSZI6nUaQl12rgry+7fn39aL5/+jQOePM6Pvf9RTQ0VrpHLw3gWzPwvAVugAXXj+apR1o56cznaNRyH5HcMY/fakG+Mm+gkfQy73J+srnzml25/MxpvPJN6znjR4/T1JydM27bAP6xWcP5wbz1uQrcG9fX818XTWSf127g4KPXEtvNvTKTXStRtSztY3ZXYOe0MGXeeTXczOYC8919fqU7Uw65Ct7V4ta545j3tSm8+tC1fP7yRZkK3Fu95eBWPjZrOOddvIazTxuZm8ANcM03JrBhTQOzry7OH8jeyysivSnzhLW8y1XwLsw273/mnfbuYKXaXX/xBK795m684R1rOO3SJ2hsgq1FfbK0J/dv/riZH8xbz9mnjeQH89ZzyMEtuQjgi/5fK7fPG8ORs1awxz/teOJdGnpq4ANDnmbURyuseV0N/CKzyCG4SKhm5Cp4592dP92Va7+5G4e8ZxWf/NaSzK473n6M+5CDW3Ix5t3ZYXznc1MZMaaT95+eyaJIUkP++Z3fCLf9/c9PH8Ce5JQmrJWU0fCxc5H1mmnvDrYjD909lMvPmsIBb16fq8ANpWehp6m3fcB/dukEnn6klbN+/Ditwzv/79HdNXDdvCdBFpyrMeiURS+GTbpFr6mUV3au01axZU8NYs4p0xk3pZ3Pf39xrgL3VjuahZ4lix4YzA2XTORN71qlfc9FqkGydd69VljLu4yGkR2rw2kKzDZP+xPKthn7xnX1nD9rLwDO/slihg33CvSobxY+0FYys94awBc+0JYo++72gR3M2rKxjos/uTsjx3Yw+/ynB+x5k2Xswfr2OcqCo+u88/QzEhzz3jQuA+s+c67cFdbyLlfBO2862ws7hC1/ZhBfu/ZxJkzL9g5hfSl5+paDWzM17t3ZYVw0ezrLn27mq9c9ypDh2j5VpCpozLuk3AXv0Jh3yruDQXGHsM9P5e9/HsbnLl3C/gdtDD+X7FhPD3z3s9N44LfDOfU/l/BPb8jOLmxpz8ROsla7EvtrR1QkYw8esnPIwHaj5jiV2lUsN/LxV5tDV1048f92CHvzu7K/FWXeuMPcs6bwu5tH84Ezn+XwE1ZVuksiIqnJVeZtQFPg41iau4MB3HrFGG763niOPGkFx31qWarHTktvM7/Lobt4Gc0d/uv8Sfzyv8byzn9/nned+jzdJS7IRHcTK/Wc5ZKntdPRvuarJnrw3MnH3j2ZZXjSMe+ql6e/olz4/U2juOKcyRx05Fpmn/dMxXYIq1bucPUFk7jlBxM4YtZyPvDFZyvdJREpB/f4TbPNs8WIVUuLfkLpz+5gAPctGM5ln92DV7zhRb5w6ZM0Rgfbq1w0E94auH/+/Qm87aQX+PB5T2f2w1HqY94Jstm0dySLZ+z5GfPubhrYbtQkzTYvKVfBO8se+vNQvjl7L6bss4Wzr3wik/XK86ynB3705Snc/pNxxcD9VGYDt4gkpAlrvcpZ8PZQtbRy1wxf9NfBXHDy3oyZ0saXr3mU1qFlPVzN6e6CS78wlQXXj+aYjy7jpLP7NxzRHfwEX5FdxaJrpxPNNq+Fset0eX2leyDVrux/fWZ2pZmtMLMHt7lvpJndaWaLil93KXc/yuXJh1r5+gdmMHx0J1+55lGGjeyqdJeqSvuWOi44ZU8WXD+a409b2u/ALSL5ZO7hWy1II/P+CXApcNU2950JLHD3OWZ2ZvH7M3p7IiPdGXa97Sr2zGMtfO3EGbQM7uZr1z/KmAldhAfJ5GXWr27g/H/bk8fuH8zHL3yaQz+4bJvqh9kWHZ/Ny5prSHCVIPraJBjz9mDb6G5kPTm7pplJNRKEo8r+TuHuvwO2X+h8LDCv+O95wDvL3Y+B9uyiZr58/AzqG52vXf8oYyd1VLpLVWXp4mZOP3oGSx5q5YzLn+DIk1ZWuksikpoEM81rJOhX6vPhWHdfBuDuy8xszM4eaGazgdkAEyZmIyt54ZkmvnL8DMyc8254lAm7Z7vsad488LuhXPSxPWhodM674TFmvGZTpbskkqrD33xBqN3//vasAe5Jbg03s7nAfHefX+nOlEPmL+64+1xgLsArXtnokdVX/V3yVUpnh/HNj+9Je1sdF93yMBP3bBuw566EaLGVJJuL7OyY7nDL5eO46vxJ7DZ9C1+a9zhjJ3UknjgWbZ9kuVf6S8WSbAkanSSX7kS3PG1o4vXx7G/a/Hy/pwwIR0vFelGp4P2CmY0vZt3jgRUV6ke//dcFk1j0wBDO+OEiJu2lP7KBsvK5Jn549hTu/dUuvP7ta/jUJUtoGay1IiI1S3/+JVUqeN8KzALmFL/e0pdGhqVe6nRb9/9mOLf+cBxvP/kFXv/2tRXrRzXp7DDm/3Ac1188AXf4t688wzGzl+9wRnmyLTrzIZqxJ9mYJNo29Yw9yUZBaWftCUb4Nk5SbVVIvCVo1St78Daza4FDgNFmthQ4h0LQvsHMTgGeAY4rdz+SWr+6ge+cNo3JMzYz6+xnKt2dqvC3Pwxl7tlTWbqohdcdsYZTvvoMY3bTxD8RoWYmnkWVPXi7+4k7+a/Dyn3sraLZet02H59/+KWpbFzfwNeuXURLi5G1JWFpbxQSLXEKsOyZRn7ytSncc/tIxkxu44s/eYyZh68DyrMZSDRjT5KtpZ7NJiiYkvbSrSTj83mRpH5Nx9Dqf30kucxPWMuCP/3PCP4wfyQfOGMpU/fZUunu5NaWTXXc9L1x3Pz9cVidc+IXnuWY2ctUSlZEXsqBHr0vlJK74J32YrG2zXX88CuTmbbfZt798RdSPnr5JZk13udjdMH/Xj+aa74xkXUrG3njsas46UvPMmpC/y6Rp721Z5IZ4/EiLTka8w62i14l6OqJ/4ypX4FNkDx3tQxcN/KrdtZrR+UueKftxkvHsXp5E6f/YAkNjTqZ+sMdFi4YzrwLduOZx1rY57UbOOtHi9nj1S9WumsiknUK3iXlKngbvZcs3ZHoOu+N6+q59Ydj+edj1rDvazeGnqNa9TbG/uCfh3L1nN14dOFQxk1t4wtzF/H6t6/FDDqDf5TRDUaiKrHOOz5WnuQqQT7KnFaiPGpYsKwqQE+DxryBpMFbRVpq2e3/tSttm+v511OXVborudDRZtx7xy7ccfUY/v6nYYwc18HH5zzJYSes0lULkRQcduiFoXYLfv3FAe5JxalISy377c2j2P/1G5i2X/YnqaUxdr0j7vDE31tZcN2u/P6WUWxc18CuE9s5+cvPcOSsFxjUUvmgHX1lEs02D2azldgStDO4f2X0mPHKbOln3uENTUKtCqKbmoy/u4qKRmnCWq8UvHdi7QuNPPNYCyef/Wylu5JJ61Y28LubR7Hghl15+pFWGgf1cNCRaznsvSv5p4NfpF77GYtImEOFEpK8yF3wHsg65aU8u6gw5XPPV2xO5XhQmey5PzO4O9qMe381gl/fOIr77xpOT7ex5ys3MvvCJ3njMasZPLz7/x5baoZ3eN11qFWS8ecE48jhrDTdjB3iP2dncPZ3V0/sk12Scetw1h5M/pIMsQcvhLB5bFP8oFmkCWsl5S54p2XrGG1Pdy8PrHI9PfDwvUO466ZR/HH+Lmx6sYFR4zo49mPLePN7VjFpr+wPKYiIVBsF750YsWthDfLjDwzmgDdtqHBvehddA72jWeMb19fz8D1D+dsfhvGn/xnJmuVNNLd28/q3r+WQf13F/m94kZ66nmL79PqadqW0ZGun060+Fh23BugMDrKG12un3A6gpyeaCqc/8zv6Y3a2VtEsdY1590rBeycm7N7OzEPXccvl43jHySsZPKx6U/D1qxt4+J6hPHR34fbUw624G42Denj1W9bxz8esYeZb19Hc+o9Qrb8rkerx1jecV+kuvJwum5ek4F3CiZ9/ntOP2ocz3703Z13xBOOntvepXZK632lY+VwTD90zpBCs7xnC0sWF8f2m5m72fs1GTvjcc+x30Ab2etXGspQuja7XTrtSWiVmm3d67E8yyVWC6OzvzpQrpXUnqrCW7ph3gmXe4TKS3YNiP+PoBzbFDlhuCt4lKXiXMP2Vmznn6kV8899357Nv34dPfuMpDjpyHXVp12hNoLsLnnuimUcWFoL1w/cOYcXSQQAMHtbFjJkbOfT4Vex74Ab2eOUmGpv0ByMilabyqL1R8C6hhx5e+eZ1fPP2B5nz4enMmb0nE6a1ccRJL3Do8asYMmLHl9LrUq7A3tlhLH+2kWVPDmLZU80se6qZ5U8NYtmTzaxY2kR3V6E/w0d3st/rNnDMR5ez74EbmLLPZqh76R9IX7PbJHtrh9ddB9tVYrZ5J7Ex6I7g2HWyMe9Y267gMSuReYevokTHyiuwc1p0fXjbWO0fnkcK3n0wdnIH/zH/Yf5020h+OW8MV351CvPOn8T4qe1M2L2NCbu3MXGPNkZPbGf4qC52Gd3N8FFdA1JVzB062422zXWsW9nIsqcLQXnZU4OKt2ZWLm16yYScliHdjJ/Wxu7/tImDj17DxD22sNerNzFh9za2ry5bji04RUQScQpLXWSnFLz7qHGQ8+Z3rebN71rNkw+18Mf5o1i6uJnnlzTz198Op7P95VnBkBFdDB/VyfDRnQwf1cXw0Z00D+6hsbGHts31tG2po31zHW2b64tf62jfwf07mik7eHgXE6a1sfdrNnDIe9oZM6WN8dPaGDe1jWEju14WpKGYufr290VnqcejfjRr7wzPxE53/DlJ2+g670SZd7Bte3fsZ4zOGk9Uv707WmEteMAKfCiOngKZnaWu2uYlKXgHTNtvC9P2W/p/33d3w6rnmli9vIn1qxpZv6qB9asbC7dVDaxf1cjSxc08dPdQ2jbX09luDGrtobm1u/i1h+aWHga1FjL25sHF+1te+phhozoZN6Wd8VPbGLrLSy/Zp71ph4hIWSV7T1Nt82oQzhL72qwORk9qZ/Skwmz0Ujuf9fSAGTvMjPsTgLe/3J322unozmCFtrF24UpgwZQkOv4M6a/Xbu9pDLUrtE03g+7oDo6VdyeYbR4cL7dgxm6VuOIbTKC7m7KYebvWo/aiJoJ3luRpprqIiGSTgncJZc/YB+p4JMmgYylCktnm8Qw6OFYaPV5wxjhAm8fqTLcFM+gkY97RzLsj2C48uz2YsUN8zDs82zxJ5p3u20429w93cG1MUpKCt4iIZI8um5eUu+Cd9eplkCwrjYpm0NHPttFx60Lb2Cf9jmAm3ObRbDbJbPN0+xrN2CGeebd1BTP2YLtEY97Rtl3BMe9opk98vDx8xAwm3oCKtPRCI7AiIiI5k6vM2wkuicrqJ8vtJFnuFc2gO4LHjGbPhbaxz4xt0THWaBacIJvd1DMo1C46a3xLd7yvbcG2bcF13uHZ5p3xMW8PZtD1wf2I6hLsY2TBthWZ4V4u7irS0otcBW8REakRumxeUu6Cd2j/6JycBEnGyqNj0NEMui3B7Oa24FhydDw4mgVH2wFs7onNNt/QHaszvak7QV+7Yn3d3Blr194RrD7XGR/ls47gOu/O4Jh3V6hZorbxjD2b74+uzLuk3AVvERGpdtpVrDe5Ct7uHh6jTVPau19B+nW/o9kzwKbgGujNnm4GHc2eATZGM+iuWF83dMZ3hnqxI/j6dMSuhHREM++O+NWe+o7gmHd7sF1HqFmhbWesXV20XYKrBFI5uQreIiJSAxyt8+5FroJ3D0Z7ivvkpr0PdHjPYRKsgY6u8Q2OP0M8g34xmM2u724NtVvTNSTUDmBtZ/CY0XbtLaF2AC+2x17XLW2xKxPdW2LnXN3meOZdtyV2ham+LXa8+i2xdgANwbYNbbFgV9+e0SCpCmsl5Sp4i4hI9XPAlXmXVNHgbWZHAN8G6oEr3H1Oqcd3Y6xLMAO4v8KZd3T/6AS1tOMVvWLZU5KZ2NEZ1dEMen1XLCtd3Tk41A5gdXssa1/VFsy8N8faAWzcFPt9dG2KXX2p3xg7Vxs2xmebN2yMtWvcHDzepli7wjFjGWfDlmDm3ZbBDNc9aeat/bzLxczqgcuAtwJLgb+Y2a3u/nCl+iQiIlVB+3mX0YHAYndfAmBm1wHHAjsN3m3eyKMd41PqXoIMOuW61hCvzBWdUR3NZgE2dAUz747YMaPjwevaEvyMm2M/Y0cwm2Vj/E+5YVPsPG/ZGLsy1bgh1IymYPZcaBvL4po2BLPgjfEp3PWbYtPG67a0h9rZlgRT48tIl81Lq2Twngg8u833S4HXVagvIiKSJZqwVlIlg/eOPra/7KOWmc0Gtl7+aD95rz8/WNZe5ddoYFWlO5Fhen12Tq/Nzum1KW3vcjzpBtbe8b9+4+gET1H1v7NKBu+lwKRtvt8NeH77B7n7XGAugJktdPeZ6XQvX/TalKbXZ+f02uycXpvSzGxhOZ7X3Y8ox/NWk0puCfoXYLqZTTOzJuAE4NYK/knyqQAABEFJREFU9kdERCQXKpZ5u3uXmZ0K3EFhqdiV7v5QpfojIiKSFxVd5+3utwG39aPJ3HL1pQrotSlNr8/O6bXZOb02pen1qRDzHGz0ISIiIv9QyTFvERERCchc8DazK81shZntcEmYmR1iZuvN7IHi7Stp97FSzGySmf3GzB4xs4fM7NM7eIyZ2XfMbLGZ/c3MXl2Jvqatj69NLZ87zWZ2r5n9v+Lr89UdPKZWz52+vDY1e+5AoSKmmf3VzH6xg/+ryfOm0rK4MclPgEuBq0o85vfuflQ63cmULuBz7n6/mQ0F7jOzO7crKXskML14ex3wfWqj+E1fXhuo3XOnHTjU3TeaWSPwBzO73d3v3uYxtXru9OW1gdo9dwA+DTwCDNvB/9XqeVNRmcu83f13wJpK9yOL3H2Zu99f/PcGCn9ME7d72LHAVV5wNzDCzNKrKVshfXxtalbxfNhaYLSxeNt+wkutnjt9eW1qlpntBrwDuGInD6nJ86bSMhe8++j1xUtct5vZfpXuTCWY2VTgVcA92/3XjsrO1lQQK/HaQA2fO8VLnw8AK4A73V3nTlEfXhuo3XPnEuALwM7qldbseVNJeQze9wNT3P2VwHeBn1e4P6kzsyHAfwOfcfcXt//vHTSpmSyil9emps8dd+929wMoVDM80Mz23+4hNXvu9OG1qclzx8yOAla4+32lHraD+2rivKmk3AVvd39x6yWu4jrxRjNLUgM3V4pjcv8NXOPuN+3gIX0qO1uNenttav3c2crd1wF3AduXoKzZc2ernb02NXzuHAwcY2ZPAdcBh5rZ1ds9pubPm0rIXfA2s3FmZsV/H0jhZ1hd2V6lo/hz/wh4xN2/tZOH3QqcVJwBehCFfW2XpdbJCunLa1Pj586uZjai+O8W4HDg0e0eVqvnTq+vTa2eO+7+RXffzd2nUihh/Wt3/8B2D6vJ86bSMjfb3MyuBQ4BRpvZUuAcChNIcPcfAP8KfNzMuoAtwAleO5VmDgY+CPy9OD4HcBYwGf7v9bkNeDuwGNgM/FsF+lkJfXltavncGQ/MM7N6CoHnBnf/hZl9DGr+3OnLa1PL587L6LypPFVYExERyZncXTYXERGpdQreIiIiOaPgLSIikjMK3iIiIjmj4C0iIpIzCt4iIiI5o+AtIiKSMwreIikws+lm9pSZ7Vn8vrG4ycVule6biOSPgrdICtx9ETAXeFvxrlOBW9x9aeV6JSJ5lbnyqCJV7EHgcDMbCZwCvK7C/RGRnFLmLZKex4G9gXOBb7r7psp2R0TySrXNRVJS3LL0eeAJ4A3u3lPhLolITinzFkmJu3cCLwJnKnCLSBIK3iLpagR+W+lOiEi+KXiLpMTMpgJP1/I+0CIyMDTmLSIikjPKvEVERHJGwVtERCRnFLxFRERyRsFbREQkZxS8RUREckbBW0REJGcUvEVERHJGwVtERCRn/j+8ZFcsoAyg+wAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from matplotlib.colors import LogNorm\n", + "plt.figure(figsize=(8,6))\n", + "plt.pcolormesh(gamma_edges, ns_edges, log_lambda, norm=LogNorm())\n", + "cbar = plt.colorbar()\n", + "cbar.set_label(r'$\\log(\\Lambda)$')\n", + "plt.contour(gamma_vals, ns_vals, log_lambda, [np.quantile(log_lambda, 0.95)])\n", + "plt.plot(gamma_best, ns_best, marker='x', color='black', ms=10)\n", + "plt.xlabel(r'$\\gamma$')\n", + "plt.ylabel(r'$n_{\\mathrm{s}}$')\n", + "plt.ylim(ns_min, ns_max)\n", + "plt.xlim(gamma_min, gamma_max)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculating the significance (local p-value)\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The significance of the source, i.e. the local p-value, can be calculated by generating the test-statistic distribution of background-only data trials, i.e. for zero injected signal events. SkyLLH provides the helper function ``create_trial_data_file`` to do that:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from skyllh.core.analysis_utils import create_trial_data_file" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function create_trial_data_file in module skyllh.core.analysis_utils:\n", + "\n", + "create_trial_data_file(ana, rss, n_trials, mean_n_sig=0, mean_n_sig_null=0, mean_n_bkg_list=None, bkg_kwargs=None, sig_kwargs=None, pathfilename=None, ncpu=None, ppbar=None, tl=None)\n", + " Creates and fills a trial data file with `n_trials` generated trials for\n", + " each mean number of injected signal events from `ns_min` up to `ns_max` for\n", + " a given analysis.\n", + " \n", + " Parameters\n", + " ----------\n", + " ana : instance of Analysis\n", + " The Analysis instance to use for the trial generation.\n", + " rss : RandomStateService\n", + " The RandomStateService instance to use for generating random\n", + " numbers.\n", + " n_trials : int\n", + " The number of trials to perform for each hypothesis test.\n", + " mean_n_sig : ndarray of float | float | 2- or 3-element sequence of float\n", + " The array of mean number of injected signal events (MNOISEs) for which\n", + " to generate trials. If this argument is not a ndarray, an array of\n", + " MNOISEs is generated based on this argument.\n", + " If a single float is given, only this given MNOISEs are injected.\n", + " If a 2-element sequence of floats is given, it specifies the range of\n", + " MNOISEs with a step size of one.\n", + " If a 3-element sequence of floats is given, it specifies the range plus\n", + " the step size of the MNOISEs.\n", + " mean_n_sig_null : ndarray of float | float | 2- or 3-element sequence of\n", + " float\n", + " The array of the fixed mean number of signal events (FMNOSEs) for the\n", + " null-hypothesis for which to generate trials. If this argument is not a\n", + " ndarray, an array of FMNOSEs is generated based on this argument.\n", + " If a single float is given, only this given FMNOSEs are used.\n", + " If a 2-element sequence of floats is given, it specifies the range of\n", + " FMNOSEs with a step size of one.\n", + " If a 3-element sequence of floats is given, it specifies the range plus\n", + " the step size of the FMNOSEs.\n", + " mean_n_bkg_list : list of float | None\n", + " The mean number of background events that should be generated for\n", + " each dataset. This parameter is passed to the ``do_trials`` method of\n", + " the ``Analysis`` class. If set to None (the default), the background\n", + " generation method needs to obtain this number itself.\n", + " bkg_kwargs : dict | None\n", + " Additional keyword arguments for the `generate_events` method of the\n", + " background generation method class. An usual keyword argument is\n", + " `poisson`.\n", + " sig_kwargs : dict | None\n", + " Additional keyword arguments for the `generate_signal_events` method\n", + " of the `SignalGenerator` class. An usual keyword argument is\n", + " `poisson`.\n", + " pathfilename : string | None\n", + " Trial data file path including the filename.\n", + " If set to None generated trials won't be saved.\n", + " ncpu : int | None\n", + " The number of CPUs to use.\n", + " ppbar : instance of ProgressBar | None\n", + " The optional instance of the parent progress bar.\n", + " tl: instance of TimeLord | None\n", + " The instance of TimeLord that should be used to measure individual\n", + " tasks.\n", + " \n", + " Returns\n", + " -------\n", + " seed : int\n", + " The seed used to generate the trials.\n", + " mean_n_sig : 1d ndarray\n", + " The array holding the mean number of signal events used to generate the\n", + " trials.\n", + " mean_n_sig_null : 1d ndarray\n", + " The array holding the fixed mean number of signal events for the\n", + " null-hypothesis used to generate the trials.\n", + " trial_data : structured numpy ndarray\n", + " The generated trial data.\n", + "\n" + ] + } + ], + "source": [ + "help(create_trial_data_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "At first we will generate 10k trials and look at the test-statistic distribution. We will time the trial generation using the ``TimeLord`` class." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "from skyllh.core.timing import TimeLord\n", + "tl = TimeLord()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[==========================================================] 100% ELT 0h:07m:31s\n", + "TimeLord: Executed tasks:\n", + "[Generating background events for data set 0.] 0.002 sec/iter (10000)\n", + "[Generating background events for data set 1.] 0.003 sec/iter (10000)\n", + "[Generating background events for data set 2.] 0.003 sec/iter (10000)\n", + "[Generating background events for data set 3.] 0.005 sec/iter (10000)\n", + "[Generating background events for data set 4.] 0.024 sec/iter (10000)\n", + "[Generating pseudo data. ] 0.030 sec/iter (10000)\n", + "[Initializing trial. ] 0.030 sec/iter (10000)\n", + "[Create fitparams dictionary. ] 1.0e-05 sec/iter (593990)\n", + "[Calc fit param dep data fields. ] 2.9e-06 sec/iter (593990)\n", + "[Get sig prob. ] 1.8e-04 sec/iter (593990)\n", + "[Evaluating bkg log-spline. ] 2.6e-04 sec/iter (593990)\n", + "[Get bkg prob. ] 3.2e-04 sec/iter (593990)\n", + "[Calc PDF ratios. ] 6.2e-05 sec/iter (593990)\n", + "[Calc pdfratio values. ] 8.2e-04 sec/iter (593990)\n", + "[Calc pdfratio value product Ri ] 3.5e-05 sec/iter (593990)\n", + "[Calc logLamds and grads ] 2.9e-04 sec/iter (593990)\n", + "[Evaluate llh-ratio function. ] 0.004 sec/iter (118798)\n", + "[Minimize -llhratio function. ] 0.052 sec/iter (10000)\n", + "[Maximizing LLH ratio function. ] 0.052 sec/iter (10000)\n", + "[Calculating test statistic. ] 3.5e-05 sec/iter (10000)\n" + ] + } + ], + "source": [ + "rss = RandomStateService(seed=1)\n", + "(_, _, _, trials) = create_trial_data_file(\n", + " ana=ana,\n", + " rss=rss,\n", + " n_trials=1e4,\n", + " mean_n_sig=0,\n", + " pathfilename='/home/mwolf/projects/publicdata_ps/txs_bkg_trails.npy',\n", + " ncpu=8,\n", + " tl=tl)\n", + "print(tl)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After generating the background trials, we can histogram the test-statistic values and plot the TS distribution." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3deXxU9fX/8dcxqOACKotEAgJClbAIGI2orUhFoYpgf4hrK7UVxa31Z6vUqoj9tVK16letC1ZxB60tIIpWvwKuNChIlUWWAmLYZBFxQ7bz+2Myt8MwM5kkM7kzyfv5eOSRzJ25956EkDOf9Zi7IyIiArBH2AGIiEjuUFIQEZGAkoKIiASUFEREJKCkICIigQZhB1ATzZo187Zt24YdhohIXpk1a9Z6d2+e6Lm8Tgpt27bl/fffDzsMEZG8YmafJHtO3UciIhLIy6RgZgPMbMwXX3wRdigiInVKXiYFd5/s7sOaNGkSdigiInVKXo8piNSWbdu2UV5ezpYtW8IORSRtDRs2pKioiD333DPtc5QURNJQXl7O/vvvT9u2bTGzsMMRqZS7s2HDBsrLy2nXrl3a5+Vl95FIbduyZQtNmzZVQpC8YWY0bdq0yq1bJQWRNCkhSL6pzu9sXiaFms4+GjV5Hmc/NINnylZkODIRkfyWl0khE7OPypZtZNKclRmMSiS7li9fTpcuXWp0jenTp3P66adnKKLMGjp0KM8//3xGrrVw4UIWLlyYkWvVN/VyoHnkgM7MX7U57DBE8oq74+7ssUdevpeUNOlfVySPbN++nQsvvJBu3boxePBgvvnmG2655RaOPvpounTpwrBhw4hWU1yyZAknn3wyRx55JD179uQ///nPLtd677336NGjB0uXLmXdunX07duXnj17cskll3DooYeyfv16li9fTqdOnbjsssvo2bMnn376KePGjaNr16506dKF6667LrjefvvtF3z9/PPPM3ToUCDSArjqqqs47rjjaN++fdAacHeuuOIKiouLOe200/jss8+y/NOTdNTLloJITYyaPC/jLc3iQxozckDnSl+3cOFCHnnkEY4//nguuugi7r//fq644gpuuukmAH7yk5/w4osvMmDAAM4//3xGjBjBmWeeyZYtW9i5cyeffvopAO+++y5XXnklkyZNok2bNlxxxRX06dOH3/72t7zyyiuMGTNml3uOHTuW+++/n1WrVnHdddcxa9YsDjzwQE455RQmTpzIoEGDUsa9evVq3n77bT7++GPOOOMMBg8ezIQJE1i4cCEfffQRa9eupbi4mIsuuqgGP0XJBLUURPJI69atOf744wG44IILePvtt5k2bRqlpaV07dqVqVOnMm/ePL788ktWrlzJmWeeCUQWMe2zzz4ALFiwgGHDhjF58mTatGkDwNtvv80555wDQL9+/TjwwAODex566KEce+yxQKR10bt3b5o3b06DBg04//zzefPNNyuNe9CgQeyxxx4UFxezdu1aAN58803OPfdcCgoKOOSQQ+jTp0+GfkpSE2opiFRROu/osyV+iqGZcdlll/H+++/TunVrbr75ZrZs2RJ0ISVSWFjIli1b+OCDDzjkkEMAUr5+3333Db5O9brY2OLnxu+9994Jr6FpvrknL1sK2hBP6qsVK1YwY8YMAMaNG8cJJ5wAQLNmzfjqq6+C/vrGjRtTVFTExIkTAfjuu+/45ptvADjggAN46aWXuP7665k+fToAJ5xwAs899xwAr776Kp9//nnC+5eWlvLGG2+wfv16duzYwbhx4zjxxBMBOPjgg1mwYAE7d+5kwoQJlX4vP/jBDxg/fjw7duxg9erVTJs2rZo/FcmkvEwK2hBP6qtOnTrx+OOP061bNzZu3Mjw4cO5+OKL6dq1K4MGDeLoo48OXvvkk09yzz330K1bN4477jjWrFkTPHfwwQczefJkLr/8csrKyhg5ciSvvvoqPXv25OWXX6awsJD9999/t/sXFhZy6623ctJJJwUD2AMHDgRg9OjRnH766fTp04fCwsJKv5czzzyTjh070rVrV4YPHx4kFwmXpWoO5rqSkhKvbpGdsx+KvNt69pJemQxJ6qgFCxbQqVOnsMPImu+++46CggIaNGjAjBkzGD58OHPmzAk7rGqLrlE4/PDDQ44kfIl+d81slruXJHq9xhREhBUrVjBkyBB27tzJXnvtxcMPPxx2SBISJQURoWPHjnzwwQdhhyE5IC/HFEREJDuUFEREJKCkICIiASUFEREJ5GVS0OI1EZHsyMukoMVrUt9s2LCB7t270717d1q2bEmrVq2Cx6NGjaJz585069aN7t27U1ZWFpw3ePBgli5dSmlpKd27d6dNmzY0b948OPejjz7isMMOY/HixQBs27aNrl27Btf4wx/+kPTaURs3bqRv37507NiRvn37Bquhly9fTqNGjYJ7XXrppcE5s2bNomvXrnTo0IGrrrpql60vnnvuOYqLi+ncuTPnnXde2j+jZcuWUVpaSseOHbn66qvZunUrEKkh0aRJkyCOW265JThn06ZNDB48mCOOOIJOnToFq8V//etfM3Xq1LTv/fHHH9OrVy/23ntv7rjjjuD4li1bOOaYYzjyyCPp3LkzI0eOrNL5UTt27KBHjx671MK4+eabd/k9mDJlStrxphTdIz0fP4466iivriEPvutDHny32udL/TJ//vywQwiMHDnSb7/9dnd3f/fdd/3YY4/1LVu2uLv7unXrfOXKle7uPnfuXB80aNAu544dO9Yvv/zyXY49++yz3rdvX3d3/+Mf/+jDhg2r9NqxfvOb3/itt97q7u633nqrX3vtte7uvmzZMu/cuXPC7+Hoo4/2d99913fu3On9+vXzKVOmuLv7okWLvHv37r5x40Z3d1+7du1u544dO9ZHjhy52/GzzjrLx40b5+7uZ599dvCaadOm+WmnnZYwjp/+9Kf+8MMPu7v7d999559//rm7uy9fvjz4maRj7dq1PnPmTL/++uuDfxt39507d/qXX37p7u5bt271Y445xmfMmJH2+VF//vOf/dxzz93l+4j9PUgl0e8u8L4n+buqdQoi1dC7d++MXi+6B1FVrV69mmbNmgUbzjVr1ix47umnnw62oEhlyJAhPProo9x22208+OCDwXqFVNeONWnSpCD+Cy+8kN69e/OnP/0pZcybN2+mV6/IbgI//elPmThxIv379+fhhx/m8ssvD3ZpbdGiRaXxQ+TN7dSpU3nmmWeAyK6s9913HzfffHPSczZv3sybb77JY489BsBee+3FXnvtBUR2ht2wYQNr1qyhZcuWld6/RYsWtGjRgpdeemmX42YW1JnYtm0b27ZtS7gJYLLzAcrLy3nppZf43e9+x5133llpLDWVl91HIhJxyimn8Omnn/K9732Pyy67jDfeeCN47p133uGoo45K6zp333031113HTfccAMHHXRQpdeOtXbt2mCvo8LCwl2K5SxbtowePXpw4okn8tZbbwGwcuVKioqKgtcUFRWxcmWkNO6iRYtYtGgRxx9/PMceeyyvvPJKWvFv2LCBAw44gAYNIu9zW7ZsuUscM2bM4Mgjj6R///7MmzcPgKVLl9K8eXN+9rOf0aNHD37xi1/w9ddfB+f07NmTd955B4Crr7466KaJ/Rg9enSlse3YsYPu3bvTokUL+vbtS2lpaVrfU9SvfvUrbrvttoQV7+677z66devGRRddlHQTw6pSS0GkGqr7zj7T9ttvP2bNmsVbb73FtGnTOPvssxk9ejRDhw5l9erVNG/ePK3rvPLKKxQWFjJ37ty0rp2OwsJCVqxYQdOmTZk1axaDBg1i3rx5Cbffjr573r59O4sXL2b69OmUl5fz/e9/n7lz57Jjxw5++MMfApExjK1btwY7wD755JMp38337NmTTz75hP32248pU6YwaNAgFi9ezPbt25k9ezb33nsvpaWl/PKXv2T06NH8/ve/ByLv3letWgXAXXfdldb3nEhBQQFz5sxh06ZNnHnmmcydOzftWtsvvvgiLVq04Kijjtrtd2748OHceOONmBk33ngj11xzDY8++mi144xSS0EkzxUUFNC7d29GjRrFfffdx9///ncAGjVqtFtdg0RWrVrFPffcw8yZM5kyZQoffvhhpdeOdfDBB7N69Wog0jUU7fLZe++9adq0KQBHHXUUhx12GIsWLaKoqIjy8vLg/PLy8qCuQ1FREQMHDmTPPfekXbt2HH744SxevJimTZsyZ84c5syZwy233MKll14aPO7atSvNmjVj06ZNbN++HYA1a9YEcTRu3DjowvnRj37Etm3bWL9+PUVFRRQVFQXv3AcPHszs2bODuLZs2UKjRo2AmrUUog444AB69+6ddusHIq29F154gbZt23LOOecwdepULrjgguDnXlBQwB577MHFF1/MzJkz075uKkoKInls4cKFwcwhgDlz5nDooYcCkW22lyxZUuk1rr76aq6//nqKioq48847ufzyy3H3lNeOdcYZZ/D4448D8PjjjwfjGOvWrWPHjh1ApKtm8eLFtG/fPtiW+1//+hfuzhNPPBGcM2jQoKCuwvr161m0aBHt27ev9HswM0466aSgnsTEiRODlsWaNWuC1snMmTPZuXMnTZs2pWXLlrRu3TrYUfX111+nuLg4uOaiRYuCd/R33XVXkIRiP0aMGJEyrnXr1rFp0yYAvv32W/73f/+XI444otLvJ+rWW2+lvLyc5cuXM378ePr06cNTTz0FECRigAkTJqTd+qiMuo9E8thXX33FlVdeyaZNm2jQoAEdOnQI6iufdtppTJ8+nZNPPjnp+a+99horVqzg5z//OQADBgzg4Ycf5oknnqBLly5Jrx1rxIgRDBkyhEceeYQ2bdrwt7/9DYiU27zpppto0KABBQUFPPjgg8F4xQMPPMDQoUP59ttv6d+/P/379wfg1FNP5dVXX6W4uJiCggJuv/32oLVRmT/96U+cc8453HDDDXTo0IHBgwcD8Pzzz/PAAw/QoEEDGjVqxPjx44PuqnvvvZfzzz+frVu30r59e8aOHQtEBoWXLFlCSUnC3aV3s2bNGkpKSti8eTN77LEHd999N/Pnz2f16tVceOGF7Nixg507dzJkyJBgWumDDz4IwKWXXpr0/MaNGye957XXXsucOXMwM9q2bctDDz2UVqyVUT0F1VOQNORjPYVvv/2Wk046iXfeeYeCgoKww6lVNa2nMGHCBGbPnh2ML+SzqtZTUPeRSB3VqFEjRo0aFczskfRt376da665JuwwQqHuI5E0uXveFZo/9dRTww4hL5111llhh5AR1ekJUktBJA0NGzZkw4YN1fpPJhIGd2fDhg00bNiwSueppSCShug0ynXr1oUdiqRhzZo1AOzcuTPkSMLVsGHDXRYKpiNnkoKZdQJ+CTQDXnf3B0IOSSQQnTcv+WH48OFA7iwyzCdZ7T4ys0fN7DMzmxt3vJ+ZLTSzJWY2AsDdF7j7pcAQIL15YCIiklHZHlN4DOgXe8DMCoC/AP2BYuBcMyuueO4M4G3g9SzHJSIiCWQ1Kbj7m8DGuMPHAEvcfam7bwXGAwMrXv+Cux8HnJ/smmY2zMzeN7P31b8rIpJZYYwptAI+jXlcDpSaWW/gx8DeQNJqEe4+BhgDkcVr2QtTRKT+CSMpJJro7e4+HZie1gXMBgADOnTokMGwREQkjHUK5UDrmMdFwKqqXMBVjlNEJCvCSArvAR3NrJ2Z7QWcA7wQQhwiIhIn21NSxwEzgMPNrNzMfu7u24ErgH8CC4Dn3H1eFa87wMzGfPHFF5kPWkSkHsvqmIK7n5vk+BRSDCancd3JwOSSkpKLq3sNERHZnfY+EhGRgJKCiIgE8jIpaExBRCQ78jIpaEqqiEh25GVSEBGR7MjLpKDuIxGR7MjLpKDuIxGR7MjLpCAiItmhpCAiIgElBRERCeRlUsjUQPP81Zs5+6EZPFO2IkORiYjkt7xMCpkYaB7YvRXFhY2Zv3ozk+aszGB0IiL5Ky+TQiacV9qGZy/pRXFh47BDERHJGfU2KYiIyO6UFEREJJCXSUErmkVEsiMvk4JWNIuIZEdeJoVM09RUEZGIrJbjzAcDu7cCIokBIrOSRETqq3rfUoidmqoWg4jUd/W+pRClFoOIiFoKAS1mExHJ06SgKakiItmRl91H7j4ZmFxSUnJxNq4fHVuIGti9lbqTRKReyMukkE3RsYUojTGISH2ipBDnvNI2uySAsx+aEbQc1GIQkbpOSaESmpUkIvVJpQPNZna8mb1mZovMbKmZLTOzpbURXC7QrCQRqU/SaSk8AlwNzAJ2ZDec3KZuJBGp69JJCl+4+8tZjyTHqRtJROqDdNYpTDOz282sl5n1jH5kPbIcE9uNVLZso7bCEJE6KZ2WQmnF55KYYw70yXw4uW9g91aULdvI9RM+YtKclepKEpE6pdKk4O4n1UYgVWFmA4ABHTp0qPV7RxPApDkr1ZUkInWOuXviJ8wucPenzOz/Jnre3e/MamRpKCkp8ffffz+0+0fXMBQXNlaLQSSH9O7dG4Dp06eHGkeuMrNZ7l6S6LlULYV9Kz7vn/mQ6obo4HPZso2ULduo7iQRyXtJk4K7P1TxeVTthZNfoqufnylboe4kEakTKh1TMLP2wP8AxxIZYJ4BXO3u9WYBW2WiySG6iV40ScRSC0JE8kE6U1KfAZ4DCoFDgL8B47IZVL6LbTVAZG1DfJIQEclF6SQFc/cn3X17xcdTRFoMkkJxYWOevaSXtsgQkbyStPvIzA6q+HKamY0AxhNJBmcDL9VCbCIiUstSjSnMIpIErOLxJTHPOfD7bAWVz6LdRmodiEg+SjX7qF1tBlIXxBboiS/WIyKSD1RPIYPiC/TESrTDanSWkmYmiUiuUFKoBfE7rEJkhlLZso3BYyUFEckFKZOCmRlQ5O6f1lI8dVLsOob5qzcHyaC03UG7JAYRkbClTAru7mY2ETiqNoIxs0HAaUAL4C/u/mpt3Le2xI85xC54ExHJBel0H/3LzI529/eqcwMzexQ4HfjM3bvEHO9HZKV0AfBXdx/t7hOBiWZ2IHAHUKeSQrIxh+h4Q5TGGEQkLOksXjuJSGL4j5l9aGYfmdmHVbjHY0C/2ANmVgD8BegPFAPnmllxzEtuqHi+zhvYvdUu01e1+llEwpROS6F/TW7g7m+aWdu4w8cAS6L7J5nZeGCgmS0ARgMvu/vsRNczs2HAMIA2bfL/3XR86yE67qBa0CIShnSK7HxiZicAHd19rJk1B/ar4X1bAbGD1+VEKrxdCZwMNDGzDu7+YIJ4xgBjIFJPoYZx5JxkM5Vin4t9rKQhIpmUzi6pI4mU4jwcGAvsCTwFHF+D+1qCY+7u9wD31OC6eS/ZTKX9G/73nyo2YSgpiEgmpdN9dCbQA5gN4O6rzKymhXfKgdYxj4uAVemeHGY5ztoSP1MpdufV2DGI2G261XIQkZpKJylsrZia6gBmtm9lJ6ThPaCjmbUDVgLnAOele7K7TwYml5SUXJyBWHJSqtXR0SQBu27THa3+Fn2NEoSIVFU6SeE5M3sIOMDMLgYuAh5O9wZmNg7oDTQzs3JgpLs/YmZXAP8kMiX1UXefV+Xo65H4JJGo5VC2bKO6lkSkRtIZaL7DzPoCm4HvATe5+2vp3sDdz01yfAowJd3rxKoP3UeVSdS9BNqdVURqJt29jz4CGhHZMvuj7IWTnvrQfVSZZN1LsQlCRKSqKl28Zma/AGYCPwYGE1nIdlG2A5OqOa+0Dc9e0ktdRiJSI+m0FH4D9HD3DQBm1hR4F3g0m4Glou4jEZHsSCcplANfxjz+kl0XntU6dR+lR9NVRaSq0kkKK4EyM5tEZExhIDDTzP4vgLvfmcX4pAZiZyiBZiOJSOXSSQr/qfiImlTxuaYL2KQWJFvoFqUWhIjESmdK6qjaCKQqNKZQdfHFfaLHQC0IEfmvvCzHqTGFysUubEtU3AdQgR8R2U1eJgVJLVESSNUaULeSiEQpKdRBlSWBWOpWEpFY6WydfRvw/4BvgVeAI4FfuftTWY4tVUwaU8gAdSuJSLx0WgqnuPu1ZnYmkTULZwHTiNRUCIXGFDIjVYsitm50oq4krYEQqZvSSQp7Vnz+ETDO3TeaJaqRI3VFbAsiWVeS1kCI1E3pJIXJZvYxke6jyyrKcW7JblgSptgWRKquJO3IKlL3VLohnruPAHoBJe6+DfiayKpmqSeiXUnPlK0IOxQRybKkLQUz+3GCY7EP/5GNgNKhgebaE+1KKlu2MajsFtu9JCJ1S6ruowEpnnNCTAoaaK490a6k6MBy7DiCiNQ9SZOCu/+sNgOR3BZNDsnGGDQbSaRuSGvxmpmdBnQGGkaPufst2QpK8o9mI4nUDeksXnsQ2Ac4CfgrkeprM7Mcl+Sw2H2VEj0WkfyVTkvhOHfvZmYfuvsoM/szIY4nSLjiV0HHP5eJ+tDqihIJTzpJ4duKz9+Y2SHABqBd9kKSXJZoFXTs40wkBXVFiYQnnaTwopkdANwOzCYy8+ivWY2qEpqSWvepK0okHOksXvu9u29y978DhwJHuPuN2Q8tZUyT3X1YkyZNwgxDRKTOSbV4rY+7T022iM3dNa4gaYuOE2iMQCS3peo+OhGYSuJFbKEuXpPcFrvDKvx3ADpat0FJQSR3pVq8NtLM9gBedvfnajEmyWPxM5Ki22Ps31D1nETyQcr/qe6+08yuAJQUJC3xs5Nip5eWLdsYtCLUjSSSm9J5+/aamf0aeJbIDqkAuPvGrEUldUZskogmiPjN9ZQcRHJHOknhoorPl8ccc6B95sORuizZ5npKCiK5I52k0MnddymqY2YNk71YpDLJNteLTRZapyASjkrXKQDvpnlMpEZiE0Kimg3PlK3g7IdmqOCPSBalWqfQEmgFNDKzHkC0wk5jIhvkhUYrmuuu4sLGPHtJL2D3LTO0/YVI9qXqPjoVGAoUAX/mv0lhM3B9dsNKTUV26i91K4lkV6p1Co8Dj5vZ/6nY4kJEROq4tAaao1+Y2d7u/l0W4xHJCG2/LVI9SQeazexaM+tFpKhOVOJajCI5Jjr+MH/15oxs5y1SX6RqKSwEzgLam9lbwAKgqZkd7u4LayU6kRrQ+INI1aVKCp8TGVDuXfHRicjg84iKxHBc1qOTeqE66xPKlm3kmbIVu62Wht33XxKR9KVap9APeAk4DLgTOAb42t1/poQgmVTZ+oR40dfEdgupu0gkM1LNProewMz+DTwF9ACam9nbwOfunmhLbZFqiV2fECt2A72o80rbJPzDn6yVkWgTPg1EiySWzuyjf7r7e8B7Zjbc3U8ws2bZDkzqh+hitER/0KOJIHb77aqOE0SvEb/PkhbCiSRWaVJw92tjHg6tOLY+WwFJ/RH77j9Rt1H8BnrJXpdKsn2WQAPRIolUqfKJu/87W4FI/RNfe6Gmr6tMoq4oEdmVymFJvRDfjSQiiaWzS2qtMLP2ZvaImT0fdiySH6Lv/NPZMfW80jY8e0mvhF1G0emtldEurVIfZDUpmNmjZvaZmc2NO97PzBaa2RIzGwHg7kvd/efZjEfqjoHdW1Fc2LjGU1ATTW9NRtNepT7IdkvhMSLrHQJmVgD8BegPFAPnmllxluOQOibVO/+qXqe03UFpv764sLEGqKVOy+qYgru/aWZt4w4fAyxx96UAZjYeGAjMz2YsUn+kmuaajmyuYdD6CMl1YQw0twI+jXlcDpSaWVPgD0APM/utu9+a6GQzGwYMA2jTRv+hZFeVTXNNRzbXMGh9hOS6MJKCJTjm7r4BuLSyk919DDAGoKSkxDMcm+S56k5fjQ5aR7/OZheRup8kl4Ux+6gcaB3zuAhYVZULmNkAMxvzxRdfZDQwqZ+ig9ZR6e7BJFIXhdFSeA/oaGbtgJXAOcB5VbmAynFKJiVrXWiGkdRHWU0KZjaOyLbbzcysHBjp7o+Y2RXAP4EC4FF3n5fNOKRuq+nAcnUl2n5j0pyVGkCWvJbt2UfnJjk+BZhS3eua2QBgQIcOHap7CakjMjGwXF3xg8YQWQgHGkCW/JWX21yo+0iiMrUvUnVp0FjqmrxMCiK1qaqV4RLVb8hkHKA1DpI9ObP3UVVo9pHUpqpWhsvE9hup4tA2G5JNeZkU3H2yuw9r0qRJ2KFIPRGtDFfZu/PSdgdlZPuNVHGoy0qyKS+TgoiIZIfGFERSyNR012TjErHjBFEaL5Aw5WVLQWMKUhuiK50zscI52bhE/LRWjRdI2PKypaApqVIbMj3dNTouAbuulo49nqiWtEhtysuWgoiIZIeSgkgWRUt41qQ2dKJrVKUUqUhV5GX3kba5kHxR1TUOVblGNEloUFoyKS+TgsYUJJ/Ejhlk6hrnlbbR+INkhbqPREQkoKQgIiIBJQUREQnk5ZiCSE1kYpVyWIV9Ekm2e2qq1dKV7biqHVnrr7xMCpp9JNWViaI88dcIewVy/Kro6B/w+G01YmcrJTunsmtK3ZeXSUGzj6S6MrFKOf4aYScFSN5iSbVaurJWTi60gqT2aUxBREQCSgoiIhJQUhARkYCSgoiIBPJyoFkkH2Ri2uozZSsoW7aR0nYHZSSmsmUbg831khX7STUrK/Z132zdwT57FQTn1OYMpcqm4X7Zohv7f/ZhrcVTl+RlUtCUVMl1mZj6CqT1h7qqYjfXi14/fgpqqniir/tyy3b2b/jfPyG1mRRSTcMtW7aRvZt1UlKoprxMCpqSKrkukwV6StsdVGvFftI9HyKtjjCnrWrKbHZoTEFERAJKCiIiElBSEBGRgJKCiIgElBRERCSgpCAiIoG8nJIqkmuqs1At2TnzV2/m7IdmpLU2If4aVTm3KnFFF4XFL3pLR3VqM6ieQ3jyMilo8ZrkkuosVEt2TvTrdBaSJbtGsnOjr0kVY7JaEbEJoao1JKpTm0H1HMKTl0lBi9ckl1RnoVqyc6LH42sfpHuNVOemE2eqWhHJFr2lozoLzbQ4LRwaUxARkYCSgoiIBJQUREQkoKQgIiIBJQUREQkoKYiISEBJQUREAkoKIiISUFIQEZGAkoKIiASUFEREJKCkICIigZzZEM/M9gXuB7YC09396ZBDEhGpd8oxuBoAAAdaSURBVLLaUjCzR83sMzObG3e8n5ktNLMlZjai4vCPgefd/WLgjGzGJSIiiWW7pfAYcB/wRPSAmRUAfwH6AuXAe2b2AlAEfFTxsh1Zjksk51WncE91zq3JfWKvEbtld7QuQ6LCPPEFdKKvi79eosJBibb+TlRYaOs+LVhdfDbPlK0IzklWuCf2eGz88c+nKvaT7jXij1f2XKp7FR/SmJEDOqd8bXVkNSm4+5tm1jbu8DHAEndfCmBm44GBRBJEETCHFC0YMxsGDANo00aFN6Ruqk7hnuqcW937pCrYE1scJ1FhnvgCOrGvi4qeE3/N+D+YyYoSHdPxEOav3o9Jc1YG5yQr3BOfuOLvNWnOSsqWbUx4/6h0rpGsaFBVCwpF4yk+JDv1JsIYU2gFfBrzuBwoBe4B7jOz04DJyU529zHAGICSkhLPYpwioalO4Z7qnFvd+8SfF/t1bIshWWGe+FZJ7OsS3auywkGxz5e2O4hnL+mV8JxkraHY+6dT4Kg610jVEqtqK6203UFZaSVAOEnBEhxzd/8a+FlaF1A5ThGRrAhjSmo50DrmcRGwqioXcPfJ7j6sSZMmGQ1MRKS+CyMpvAd0NLN2ZrYXcA7wQghxiIhInGxPSR0HzAAON7NyM/u5u28HrgD+CSwAnnP3eVW87gAzG/PFF19kPmgRkXos27OPzk1yfAowpQbXnQxMLikpubi61xARkd1pmwsREQnkZVJQ95GISHbkZVLQ7CMRkeww9/xd/2Vm64BPqnFqM2B9hsPJBMVVNYqr6nI1NsVVNTWN61B3b57oibxOCtVlZu+7e0nYccRTXFWjuKouV2NTXFWTzbjysvtIRESyQ0lBREQC9TUpjAk7gCQUV9UorqrL1dgUV9VkLa56OaYgIiKJ1deWgoiIJKCkICIigXqXFJLUhw6VmbU2s2lmtsDM5pnZL8OOKZaZFZjZB2b2YtixRJnZAWb2vJl9XPFzS1yhpZaZ2dUV/4ZzzWycmTUMKY7d6qOb2UFm9pqZLa74fGCOxHV7xb/jh2Y2wcwOyIW4Yp77tZm5mTXLlbjM7MqKv2PzzOy2TN6zXiWFmPrQ/YFi4FwzKw43KgC2A9e4eyfgWODyHIkr6pdEdrTNJf8DvOLuRwBHkgPxmVkr4CqgxN27AAVEtoYPw2NAv7hjI4DX3b0j8HrF49r2GLvH9RrQxd27AYuA39Z2UCSOCzNrTaSe/IraDqjCY8TFZWYnESlh3M3dOwN3ZPKG9SopEFMf2t23AtH60KFy99XuPrvi6y+J/IGrWmHeLDGzIuA04K9hxxJlZo2BHwCPALj7VnffFG5UgQZAIzNrAOxDFQtIZYq7vwlsjDs8EHi84uvHgUG1GhSJ43L3Vyu21Af4F5HCW6HHVeEu4FoglBk5SeIaDox29+8qXvNZJu9Z35JCovrQOfHHN8rM2gI9gLJwIwncTeQ/xc6wA4nRHlgHjK3o1vqrme0bdlDuvpLIu7YVwGrgC3d/NdyodnGwu6+GyBsRoEXI8SRyEfBy2EEAmNkZwEp3/3fYscT5HvB9MyszszfM7OhMXry+JYWE9aFrPYokzGw/4O/Ar9x9cw7EczrwmbvPCjuWOA2AnsAD7t4D+JpwukJ2UdFHPxBoBxwC7GtmF4QbVf4ws98R6Up9Ogdi2Qf4HXBT2LEk0AA4kEhX82+A58ws0d+2aqlvSaHG9aGzxcz2JJIQnnb3f4QdT4XjgTPMbDmRrrY+ZvZUuCEBkX/HcnePtqaeJ5IkwnYysMzd17n7NuAfwHEhxxRrrZkVAlR8zmi3Q02Y2YXA6cD5nhuLpw4jktz/XfH7XwTMNrOWoUYVUQ78wyNmEmnFZ2wQvL4lhZysD12R5R8BFrj7nWHHE+Xuv3X3IndvS+RnNdXdQ3/n6+5rgE/N7PCKQz8E5ocYUtQK4Fgz26fi3/SH5MAAeIwXgAsrvr4QmBRiLAEz6wdcB5zh7t+EHQ+Au3/k7i3cvW3F73850LPidy9sE4E+AGb2PWAvMriTa71KCpmoD50lxwM/IfJOfE7Fx4/CDirHXQk8bWYfAt2BP4YcDxUtl+eB2cBHRP5/hbJNQqL66MBooK+ZLSYyo2Z0jsR1H7A/8FrF7/6DORJX6JLE9SjQvmKa6njgwky2rrTNhYiIBOpVS0FERFJTUhARkYCSgoiIBJQUREQkoKQgIiKBBmEHIJLPzKwpkc3lAFoCO4hswQEwARhScWwncEnMgjuRnKQpqSIZYmY3A1+5+x0VW3nfCfR29+8qtl3ey91zYgW9SDJqKYhkRyGwPmYny4ytOBXJJo0piGTHq0BrM1tkZveb2YlhBySSDiUFkSxw96+Ao4BhRMYYnjWzoaEGJZIGdR+JZIm77wCmA9PN7CMim9A9FmZMIpVRS0EkC8zscDPrGHOoO/BJWPGIpEstBZHs2A+4t6II/XZgCZGuJJGcpimpIiISUPeRiIgElBRERCSgpCAiIgElBRERCSgpiIhIQElBREQCSgoiIhL4/9NYVDE+gbElAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "(h, be) = np.histogram(trials['ts'], bins=np.arange(0, np.max(trials['ts'])+0.1, 0.1))\n", + "plt.plot(0.5*(be[:-1]+be[1:]), h, drawstyle='steps-mid', label='background')\n", + "plt.vlines(ts, 1, np.max(h), label=f'TS(TXS 0506+056)={ts:.3f}')\n", + "plt.yscale('log')\n", + "plt.xlabel('TS')\n", + "plt.ylabel('#trials per bin')\n", + "plt.legend()\n", + "pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see that the TS value of the unblinded data for TXS is rather large and 10k trials are not enough to calculate a reliable estimate for the p-value. Hence, we will generate a few more trials. SkyLLH provides also a helper function to extend the trial data file we just created. It is called ``extend_trial_data_file``: " + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "from skyllh.core.analysis_utils import extend_trial_data_file" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function extend_trial_data_file in module skyllh.core.analysis_utils:\n", + "\n", + "extend_trial_data_file(ana, rss, n_trials, trial_data, mean_n_sig=0, mean_n_sig_null=0, mean_n_bkg_list=None, bkg_kwargs=None, sig_kwargs=None, pathfilename=None, **kwargs)\n", + " Appends to the trial data file `n_trials` generated trials for each\n", + " mean number of injected signal events up to `ns_max` for a given analysis.\n", + " \n", + " Parameters\n", + " ----------\n", + " ana : Analysis\n", + " The Analysis instance to use for sensitivity estimation.\n", + " rss : RandomStateService\n", + " The RandomStateService instance to use for generating random\n", + " numbers.\n", + " n_trials : int\n", + " The number of trials the trial data file needs to be extended by.\n", + " trial_data : structured numpy ndarray\n", + " The structured numpy ndarray holding the trials.\n", + " mean_n_sig : ndarray of float | float | 2- or 3-element sequence of float\n", + " The array of mean number of injected signal events (MNOISEs) for which\n", + " to generate trials. If this argument is not a ndarray, an array of\n", + " MNOISEs is generated based on this argument.\n", + " If a single float is given, only this given MNOISEs are injected.\n", + " If a 2-element sequence of floats is given, it specifies the range of\n", + " MNOISEs with a step size of one.\n", + " If a 3-element sequence of floats is given, it specifies the range plus\n", + " the step size of the MNOISEs.\n", + " mean_n_sig_null : ndarray of float | float | 2- or 3-element sequence of\n", + " float\n", + " The array of the fixed mean number of signal events (FMNOSEs) for the\n", + " null-hypothesis for which to generate trials. If this argument is not a\n", + " ndarray, an array of FMNOSEs is generated based on this argument.\n", + " If a single float is given, only this given FMNOSEs are used.\n", + " If a 2-element sequence of floats is given, it specifies the range of\n", + " FMNOSEs with a step size of one.\n", + " If a 3-element sequence of floats is given, it specifies the range plus\n", + " the step size of the FMNOSEs.\n", + " bkg_kwargs : dict | None\n", + " Additional keyword arguments for the `generate_events` method of the\n", + " background generation method class. An usual keyword argument is\n", + " `poisson`.\n", + " sig_kwargs : dict | None\n", + " Additional keyword arguments for the `generate_signal_events` method\n", + " of the `SignalGenerator` class. An usual keyword argument is\n", + " `poisson`.\n", + " pathfilename : string | None\n", + " Trial data file path including the filename.\n", + " \n", + " Additional keyword arguments\n", + " ----------------------------\n", + " Additional keyword arguments are passed-on to the ``create_trial_data_file``\n", + " function.\n", + " \n", + " Returns\n", + " -------\n", + " trial_data :\n", + " Trial data file extended by the required number of trials for each\n", + " mean number of injected signal events..\n", + "\n" + ] + } + ], + "source": [ + "help(extend_trial_data_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[==========================================================] 100% ELT 0h:29m:56s\n" + ] + } + ], + "source": [ + "tl = TimeLord()\n", + "rss = RandomStateService(seed=2)\n", + "trials = extend_trial_data_file(\n", + " ana=ana,\n", + " rss=rss,\n", + " n_trials=4e4,\n", + " trial_data=trials,\n", + " pathfilename='/home/mwolf/projects/publicdata_ps/txs_bkg_trails.npy',\n", + " ncpu=8,\n", + " tl=tl)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TimeLord: Executed tasks:\n", + "[Generating background events for data set 0.] 0.002 sec/iter (40000)\n", + "[Generating background events for data set 1.] 0.003 sec/iter (40000)\n", + "[Generating background events for data set 2.] 0.003 sec/iter (40000)\n", + "[Generating background events for data set 3.] 0.005 sec/iter (40000)\n", + "[Generating background events for data set 4.] 0.019 sec/iter (40000)\n", + "[Generating pseudo data. ] 0.027 sec/iter (40000)\n", + "[Initializing trial. ] 0.032 sec/iter (40000)\n", + "[Create fitparams dictionary. ] 1.1e-05 sec/iter (2375320)\n", + "[Calc fit param dep data fields. ] 3.3e-06 sec/iter (2375320)\n", + "[Get sig prob. ] 2.0e-04 sec/iter (2375320)\n", + "[Evaluating bkg log-spline. ] 2.8e-04 sec/iter (2375320)\n", + "[Get bkg prob. ] 3.5e-04 sec/iter (2375320)\n", + "[Calc PDF ratios. ] 6.8e-05 sec/iter (2375320)\n", + "[Calc pdfratio values. ] 8.5e-04 sec/iter (2375320)\n", + "[Calc pdfratio value product Ri ] 3.9e-05 sec/iter (2375320)\n", + "[Calc logLamds and grads ] 3.1e-04 sec/iter (2375320)\n", + "[Evaluate llh-ratio function. ] 0.005 sec/iter (475064)\n", + "[Minimize -llhratio function. ] 0.054 sec/iter (40000)\n", + "[Maximizing LLH ratio function. ] 0.054 sec/iter (40000)\n", + "[Calculating test statistic. ] 3.7e-05 sec/iter (40000)\n" + ] + } + ], + "source": [ + "print(tl)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The local p-value is defined as the fraction of background trials with TS value greater than the unblinded TS value of the source. " + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-log10(p_local) = 2.93\n" + ] + } + ], + "source": [ + "minus_log10_pval = -np.log10(len(trials[trials['ts'] > ts]) / len(trials))\n", + "print(f'-log10(p_local) = {minus_log10_pval:.2f}')" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3deXzU9bX/8dcxiOCCyqaRgARBSwANmBqp3opeUVAR6A9BsI9KbQUXtO1tq5YuiF2ktbW91VqFiksVxNqLiHKtbZHiQoNgc2WTpUAxECGAgBv7+f0x+X47mUySSchkZjLv5+PBI5nvzPc7J+M4Zz7rMXdHREQE4KhUByAiIulDSUFEREJKCiIiElJSEBGRkJKCiIiEWqQ6gCPRvn1779q1a6rDEBHJKEuXLt3u7h3i3ZfRSaFr164sWbIk1WGIiGQUM/tXTfep+0hEREJKCiIiElJSEBGRUEaPKYg0lQMHDlBWVsbevXtTHYpIwlq1akVeXh5HH310wucoKYgkoKysjBNOOIGuXbtiZqkOR6RO7s6OHTsoKysjPz8/4fPUfSSSgL1799KuXTslBMkYZka7du3q3brNyKRgZkPMbOru3btTHYpkESUEyTQNec9mZFJw97nuPu7EE09s0PmT565g1COLmFGyqZEjExHJbBmZFBpDyYadzCndnOowRBK2ceNGevfuHfe+1atXs3r16jqvsWDBAq666qrGDq1RjB07lueeey7VYWS9rBxonjSkFyu37El1GCIZxd1xd446Kmu/S2YF/dcVySAHDx7k+uuv5+yzz2bEiBF88skn3HPPPYwYMYIhQ4Ywbtw4gmqK69at49JLL+Wcc86hX79+/POf/6xyrbfeeou+ffuyfv16KioqGDhwIP369WP8+PGcfvrpbN++nY0bN9KzZ09uueUW+vXrx3vvvcfMmTPp06cPvXv35s477wyvd/zxx4e/P/fcc4wdOxaItABuv/12Pve5z9GtW7ewNeDuTJgwgYKCAq688kq2bduW5FdPEpGVLQWRIzF57opGb2kWnNaGSUN61fm41atX8+ijj3LBBRdwww038NBDDzFhwgRGjRoFwI9+9CNefPFFhgwZwnXXXcddd93F8OHD2bt3L4cPH+a9994D4M033+S2225jzpw5dOnShQkTJnDJJZfwne98h5dffpmpU6dWec7HHnuMhx56iC1btnDnnXeydOlSTj75ZC677DKef/55hg0bVmvc5eXlvP7667z77rtcffXVjBgxgtmzZ7N69WqWLVvG1q1bKSgo4IYbbjiCV1Eag1oKIhmkc+fOXHDBBQB88Ytf5PXXX+fVV19l5MiRDBkyhPnz57NixQo+/PBDNm/ezPDhw4HIIqZjjz0WgFWrVjFu3Djmzp1Lly5dAHj99de59tprARg0aBAnn3xy+Jynn346559/PhBpXQwYMIAOHTrQokULrrvuOhYuXFhn3MOGDeOoo46ioKCArVu3ArBw4UJGjx5NTk4Op512GpdcckkjvUpyJNRSEKmnRL7RJ0vsFEMz45ZbbmHWrFnk5uYyc+ZM9u7dG3YhxZObm8vevXv5xz/+wWmnnQZQ6+OPO+648PfaHhcdW+zc+GOOOSbuNTTNN/2opSCSQTZt2sSiRYsAmDlzJhdeeCEAJ598Mh9//HHYX9+mTRvy8vJ4/vnnAdi3bx+ffPIJACeddBIvvfQSEydOZMGCBQBceOGFPPvsswC88sorfPDBB3Gfv7i4mL/97W9s376dQ4cOMXPmTC666CIATjnlFFatWsXhw4eZPXt2nX/L5z//eZ555hkOHTpEeXk5r776agNfFWlMaimIZJCePXvyxBNPMH78eHr06MHNN9/MBx98wNVXX02nTp347Gc/Gz7297//PePHj+cHP/gBRx99NH/4wx/C+0455RTmzp3L4MGDmT59OpMmTWL06NHMmjWLiy66iNzcXE444QQ++uijKs+fm5vLvffey8UXX4y7c8UVVzB06FAApkyZwlVXXUXnzp3p3bt3tXNjDR8+nPnz59OnTx/OPPPMMLlIalltzcF0V1RU5A0tsjPqkci3rVnj+zdmSNJMrVq1ip49e6Y6jBoFaxTOOuusBp2/b98+cnJyaNGiBYsWLeLmm2+mtLS0MUOUFIn33jWzpe5eFO/xadVSMLPjgIXAJHd/MdXxiGSLTZs2MXLkSA4fPkzLli2ZNm1aqkOSFElqUjCz6cBVwDZ37x11fBDw30AO8Dt3n1J5153As8mMSUSq69GjB//4xz9SHYakgWQPND8ODIo+YGY5wG+AwUABMNrMCszsUmAlsDXJMYmISA2S2lJw94Vm1jXm8HnAOndfD2BmzwBDgeOB44gkik/NbJ67H469ppmNA8YB4RxrERFpHKkYU+gEvBd1uwwodvcJAGY2FtgeLyEAuPtUYCpEBpqTG6qISHZJRVKIt1ol/HB398ebLhQREYmWisVrZUDnqNt5wJb6XEBFdkREkiMVSeEtoIeZ5ZtZS+Ba4IX6XOBIi+yIZJodO3ZQWFhIYWEhp556Kp06dQpvT548mauuuoqrr76awsJCSkpKwvNGjBjB+vXrKS4uprCwkC5dutChQ4fw3GXLlnHGGWewdu1aAA4cOECfPn3Ca/z4xz+mV69enH322dWuHdi5cycDBw6kR48eDBw4MFwNvXHjRlq3bh0+10033RSes3TpUvr06UP37t25/fbbq2x98eyzz1JQUECvXr0YM2ZMwq/Rhg0bKC4upkePHowaNYr9+/cDkRoSJ554YhjHPffcE56za9cuRowYwWc+8xl69uwZrhb/1re+xfz58xN+7nfffZf+/ftzzDHH8POf/zw8vnfvXs477zzOOeccevXqxaRJk+p1fuDQoUP07du3Si2Mu+++u8r7YN68eQnHW6tgj/Rk/ANmAuXAASIthK9UHr8CWAP8E/huA647BJjavXt3b6iRD7/pIx9+s8HnS3ZZuXJlqkMITZo0ye+77z53d3/zzTf9/PPP93feecffffddr6io8M2bN7u7+/Lly33YsGFVzn3sscf81ltvrXJs1qxZPnDgQHd3/8lPfuLjxo2rcu29e/e6u1e5drRvf/vbfu+997q7+7333ut33HGHu7tv2LDBe/XqFfdv+OxnP+tvvvmmHz582AcNGuTz5s1zd/c1a9Z4YWGh79y5093dt27dWu3cxx57zCdNmlTt+DXXXOMzZ850d/fx48f7Qw895O7ur776ql955ZVx4/jSl77k06ZNc3f3ffv2+QcffODu7hs3bgxfk0Rs3brVFy9e7BMnTgz/27i7Hz582D/88EN3d9+/f7+fd955vmjRooTPD/ziF7/w0aNHV/k7ot8HtYn33gWWeA2fr8mefTS6huPzgAanNXefC8wtKiq6saHXEDkSAwYMaNTrBXsQ1Vd5eTnt27enZcuWALRv3z687+mnnw63oKjNyJEjmT59Oj/72c94+OGHw/UKwbWDzeyirx1tzpw5YfzXX389AwYM4Kc//WmtMe/Zs4f+/SO7CXzpS1/i+eefZ/DgwUybNo1bb7013KW1Y8eOdcYPkS+38+fPZ8aMGWEcd999NzfffHON5+zZs4eFCxfy+OOPA9CyZcvwdTz99NPZsWMH77//Pqeeemqdz9+xY0c6duzISy+9VOW4mYV1Jg4cOMCBAwfibgJY0/kAZWVlvPTSS3z3u9/l/vvvrzOWI6UN8UQy2GWXXcZ7773H5ZdfzuTJk/nb3/4W3vfGG29w7rnnJnSdX/3qV9x5551873vfo23btlWufeaZZ3LLLbdUuXa0rVu3kpubC0T2RooulrNhwwb69u3LRRddxGuvvQbA5s2bycvLCx+Tl5fH5s2R0rhr1qxhzZo1XHDBBZx//vm8/PLLCcW/Y8cOTjrpJFq0aFHtmgCLFi3inHPOYfDgwaxYsQKA9evX06FDB7785S/Tt29fvvrVr/Lxxx+H5/Tr14833ngDgG984xthN030vylTplCXQ4cOUVhYSMeOHRk4cCDFxcUJ/U2Br3/96/zsZz+LW/HuwQcf5Oyzz+aGG26ocRPD+kqrbS4SZWZDgCHdu3dPdSiSpRr6zb6xHX/88SxdupSnn36akpISRo0axZQpUxg7dizl5eV06NAhoeu8/PLL5Obmsnz58mrXfu2113j11VerXDsRubm5bNq0iXbt2rF06VKGDRvGihUr4m6/HXx7PnjwIGvXrmXBggWUlZXxH//xHyxfvpxDhw7xn//5n0BkDGP//v3hDrC///3v436bD67Zr18//vWvf3H88cczb948hg0bxtq1azl48CBvv/02DzzwAMXFxXzta19jypQp/PCHPwQi3963bInMgfnlL3+Z0N8cT05ODqWlpezatYvhw4ezfPnyGmttx3rxxRfp2LEj5557brX33M0338z3v/99zIzvf//7fPOb32T69OkNjjOQkS0F10CzSCgnJ4fi4mJuv/12HnzwQf74xz8C0Lp162p1DeLZsmULv/71r1m8eDHz5s3jnXfeqXLtAQMGMHny5CrXjnbKKadQXl4ORLqGgi6fY445hnbt2gFw7rnncsYZZ7BmzRry8vIoKysLzy8rKwvrOuTl5TF06FCOPvpo8vPzOeuss1i7di3t2rWjtLSU0tJS7rnnHm666abwdp8+fWjfvj27du3i4MGD1a7Zpk2bsAvniiuu4MCBA2zfvp28vDzy8vLCb+4jRozg7bffDuPau3cvrVu3Bo6spRA46aSTGDBgQMKtH4i09l544QW6du3Ktddey/z58/niF78Yvu45OTkcddRR3HjjjSxevDjh69YmI5OCiESsXr06nDkEUFpayumnnw5Ettlet25dndf4xje+wcSJE8nLy+P+++/n1ltvxd1rvXa0q6++mieeeAKAJ554IhzHqKio4NChQ0Ckq2bt2rV069Yt3Jb773//O+7Ok08+GZ4zbNiwsK7C9u3bWbNmDd26davzbzAzLr744rCeRHQc77//ftg6Wbx4MYcPH6Zdu3aceuqpdO7cOdxh9q9//SsFBQXhNdesWRN+o//lL38ZJqHof3fddVetcVVUVLBr1y4APv30U/7yl7/wmc98ps6/J3DvvfdSVlbGxo0beeaZZ7jkkkt46qmnAMJEDDB79uyEWx91UfeRSAb76KOPuO2229i2bRs5OTn06tUrrK985ZVXsmDBAi699NIaz//zn//Mpk2b+MpXvgLAkCFDmDZtGk8++SS9e/fmtttuY9euXbRo0YLu3btXqd0cuOuuuxg5ciSPPvooXbp0Ces2LFy4kB/84Ae0aNGCnJwcHn744XC84re//S1jx47l008/ZfDgwQwePBiAyy+/nFdeeYWCggJycnK47777wtZGXX76059y7bXX8r3vfY++ffuGf9Nzzz3Hb3/7W1q0aEHr1q155plnwq6lBx54gOuuu479+/fTrVs3HnvsMSAyKLxu3TqKiuLuLl3N+++/T1FREXv27OGoo47iV7/6FStXrqS8vJzrr7+eQ4cOcfjwYUaOHBlOK3344YcBuOmmm2o8v02bNjU+5x133EFpaSlmRteuXXnkkUcSirUuqqegegqSgEysp/Dpp59y8cUX88Ybb5CTk5Oq0DLS7Nmzefvtt8PxhUxW33oK6j4SaaZat27N5MmTq8zCkcQcPHiQb37zm6kOIyXUfSSSIHfPuELzl19+eapDyEjXXHNNqkNoFA3pCcrIloJmH0lTa9WqFTt27GjQ/2QiqeDu7Nixg1atWtXrvIxsKYg0tWAaZUVFRapDiev9998H4PDhuDvOS5Zq1apVlYWCiVBSEElAMG8+XQXbOaTLojrJXBnZfSQiIsmRkUlB9RRERJIjI5OCBppFRJIjI5OCiIgkh5KCiIiElBRERCSkpCAiIqGMTAqafSQikhwZmRQ0+0hEJDkyMimIiEhyZHVSKNmwkxklm1IdhohI2sjapDC0sBMAc0q117yISCBrk8KY4i4U57dNdRgiImkla5OCiIhUl/VJYWX5HkY9skhjCyIiZGg9hcYqxxmMK6ws3wNEupRERLJZRrYUGmudwpjiLswa35+C3DaNFJmISGbLyKQgIiLJoaRQSWsWRESUFACtWRARCSgpoDULIiIBJYUo6kISkWynpFAp6EKaOHuZ1i2ISNbKyHUKyRCsUZhTulnrFkQka6mlECV63YK6kkQkGykpxKHZSCKSrdImKZhZTzN72MyeM7ObUxlLMBtJ+yKJSLZJ6piCmU0HrgK2uXvvqOODgP8GcoDfufsUd18F3GRmRwHTkhlXIoLWQsmGnZRs2AlojEFEmr9ktxQeBwZFHzCzHOA3wGCgABhtZgWV910NvA78Nclx1SkYX/jJ8D5AZFaSWgwi0twlNSm4+0JgZ8zh84B17r7e3fcDzwBDKx//grt/Driupmua2TgzW2JmSyoqKpIVemhMcZcqiUHdSSLSnKViSmon4L2o22VAsZkNAL4AHAPMq+lkd58KTAUoKiry5IX5b5quKiLZIhUDzRbnmLv7Ane/3d3Hu/tvar2A2RAzm7p79+4khVidpquKSDZIRVIoAzpH3c4DttTnAo1VT6EhNF1VRJqzVCSFt4AeZpZvZi2Ba4EX6nOBVLQUAto8T0Sas2RPSZ0JDADam1kZMMndHzWzCcCfiExJne7uK+pzXXefC8wtKiq6sbFjTlSwhiEwtLCTxhlEJOMlNSm4++gajs+jlsHkdBd0IQWCtQxzSjcrOYhIRqszKZjZBcDdwOmVjzciA8PdkhtarTENAYZ07949Jc8/prhLlQ/+GSWbNDNJRJqFRMYUHgXuBy4EPgsUVf5MmVQONMcTPTNJW2OISCZLpPtot7v/b9IjaQZit8ZQd5KIZJpEksKrZnYf8D/AvuCgu7+dtKjqkOruo5oE3UpBd5L2TRKRTJNI91ExkS6jnwC/qPz382QGVZd06z6KFbtvktY0iEimqLOl4O4XN0UgzdGY4i7hAPSoRxapK0lE0l6NScHMvujuT5nZf8W7393vT15YzYfGGUQkk9TWUjiu8ucJTRFIfaTrmEI8seMMmrYqIunM3Jtko9GkKCoq8iVLlqQ6jHoZ9cgiVpbvoSC3jVoM0mgGDBgAwIIFC1Iah2QGM1vq7kXx7ktk8Vo3IlXSzgccWAR8w93XN2qUWUIV3UQknSUy+2gG8CyQC5wG/AGYmcygmrN4Fd202E1E0kUi6xTM3X8fdfupyg3tUiaTxhRqEl24R4PQIpIuahxTMLNgf+g7gF1EymY6MAo4xt1/2CQR1iITxxTiiV7sBlCc31bJQepFYwpSHw0dU1hKJAkEldLGR93nQMqTQnOhGUoiki5qTArunt+Ugci/k8OoRxZRsmEnQx54nWNb5gCq1yAiTSOp9RSkYYYWdqJkw06Wbd7NCa3+/Z9ISUFEki0V5TilDtElPwty21CQ2ybFEYlItqi1pWBmBuS5+3tNFE9CmsPso7oE6xmGFnYKB6GDaauapSQiyVJrUnB3N7PngXObKJ6EpEON5mSLre4WTFkNftfCNxFJhkS6j/5uZimttJbtoruTomlLbhFpbIkMNF8M3GRmG4GP+XeN5rOTGZhUF0xVDRKEtuQWkcaWSFIYnPQopE7BGEPs7+pKEpHGlEiRnX+Z2YVAD3d/zMw6AMcnPzSJFjvGEBybUbKJibOXMad0s5KCiByxRHZJnUSkHOdZwGPA0cBTwAXJDU0SEVvdDbTQTUQaLpHuo+FAX+BtAHffYmZpV3gnm8XrTgoGoZUgRKQ+EkkK+yunpjqAmR1X1wnJlg3rFOojumsp2D8JIgPR2n1VROojkSmpz5rZI8BJZnYj8BdgWnLDqp27z3X3cSeeeGIqw0hLQb2GWeP7hyuhSzbsVN0GEUlIIgPNPzezgcAe4EzgB+7+56RHJo2qOL+tdl8VkToluvfRMuA1YGHl75JBivPbhi2H6O0yRERi1ZkUzOyrwGLgC8AIIiucb0h2YNL4ggHpOaWbmVGySd1JIlJNIgPN3wb6uvsOADNrB7wJTE9mYNL4gumrQJVKb+pOEpFAIkmhDPgw6vaHQFrtmirxRe+0KiKSiESSwmagxMzmECnDORRYbGb/BeDu9ycxPjkC8VZBw7/3UILIzCQtehORQCJJ4Z+V/wJzKn9qAVsGil3oBlWThJKCSHZLZErq5KYIRJpGdOshqAWtym4iElCNZglF75/0yf5DHNsyR11KIlkmrZKCmQ0DrgQ6Ar9x91dSHFKzVtNA9MryPXy492B4W0lBJHskPSmY2XTgKmCbu/eOOj4I+G8gB/idu09x9+eB583sZODngJJCEsUORMd2K4lI9klk6+yfAT8CPgVeBs4Bvu7uTyX4HI8DDwJPRl0zB/gNMJDIlNe3zOwFd19Z+ZDvVd4vKRC0HEo27IzbpRT9OLUiRJqXRLa5uMzd9xD5tl9GZP+jbyf6BO6+EIj92nkesM7d17v7fuAZYKhF/BT4X3d/O971zGycmS0xsyUVFRWJhiH1EGyq95PhfcJB6JXle1i2eXc4U2ll+R7ViBZphhJJCkdX/rwCmOnujdGv0ImqC+DKKo/dBlwKjDCzm+Kd6O5T3b3I3Ys6dOjQCKFITeLtuFqQ2ya8HbQitFWGSPORyJjCXDN7l0j30S2V5Tj3HuHzWpxj7u6/Bn5d58mqp5ByQReTdl4VaV7qbCm4+11Af6DI3Q8AHxNZ1XwkyoDOUbfzgC2Jnqx6Ck1vaGEnivPbhskgaEVojYNI81JjS8HMvhDnWPTN/zmC530L6GFm+US20bgWGHME15Mkq2nLDBFpXmrrPhpSy31OgknBzGYCA4D2ZlYGTHL3R81sAvAnIlNSp7v7isRCVveRiEiy1JgU3P3LjfEE7j66huPzgHkNvOZcYG5RUdGNRxKbNI6gcI9aEiKZL6HFa2Z2JdALaBUcc/d7khVUAvGopZAmhhZ2omTDTuaUbmZMcRdmlGxiTunmhNcwBI8PrqXEIpJaiSxeexg4FrgY+B2R6muLkxxXrdRSSB9B4Z6gtRBbvCc6SQDVEsac0s3apVUkjSTSUvicu59tZu+4+2Qz+wVHNsgszUzQWpg4exkntIq8peIlieA4VP3w1wwmkfSRSFL4tPLnJ2Z2GrADyE9eSHVT91F6if7WD5HtMJZt3l0lScQzo2QTJRt2UpzftkniFJG6JZIUXjSzk4D7gLeJzDz6XVKjqoO6j9JP7JTV6LGCmjbXix5L0JYZIukhkSI7P6z89Y9m9iLQyt13JzcsyXSxxXxqUpzfNhyXEJHUq23x2iXuPr+mRWzurnEFEZFmpraWwkXAfOIvYkt48VoyaEwhswUb6a0s36NBZpE0U9vitUlmdhSRbayfbcKY6qQxhcwWveNqbNU3rVsQSa1axxTc/XDldhRplRQkcxXnt2XW+P413h+7biE4BkoSIk0hkdlHfzazbwGziOyQCkAj1VUQCQWzlIIpqiUbdobHgqmtSgoiyZVIkZ0bgFuBhcDSyn9LkhmUND8ry/dUawFEi+1GihZdAS4wo2STCvyIJEEiSaGnu+dH/wMKkh1YbcxsiJlN3b1bM2MzwdDCThTktok7hhAYU9wl7iK2YMpqrGCltKayijSuRLqP3gT6JXCsyWigObMkWoshSBhazCaSOrWtUziVSN3k1mbWl3+X0GxDZIM8kUYVnTyUFERSo7aWwuXAWCKlMn/Bv5PCHmBicsMSqa5kw85wdXRNW2eIyJGpbZ3CE8ATZvb/3P2PTRiTSI1iB6uDhXCgKasijSGRMYWewS9mdoy770tiPAnRiubmL3p8IVowCyloKQS3o5NFfYr8iEhVNc4+MrM7zKw/kaI6gZp3NmtC7j7X3cedeOKJqQ5FkmRMcRdmje8ffrAPLexEcX7bKkkiWAg3a3z/MDloVpLIkamtpbAauAboZmavAauAdmZ2lruvbpLoRCo1dBC6vuVBRbJdbesUPiAyoLwOGAD8uvL4XWb2ZpLjEjkisZXf1HIQSUxtLYVBwCTgDOB+4P+Aj939y00RmMiRUiIQqb/aZh9NBDCz/wOeAvoCHczsdeADd4+3pbZIysVbGR09nVVdSSI1S2T20Z/c/S3gLTO72d0vNLP2yQ5MpL6CGUjxZiTF3lZSEIkvkXKcd0TdHFt5bHuyAhJpiOhZSbHTWGOnr4pIzRJpKYTc/f+SFYjIkYi3v1J0jWitgBZJTCK7pKYd7ZIqIpIcGZkUtHgtu8VbyNaYjxfJZvXqPhJJB4luxR3v8UG5z/rORNIiOMkWGdlSEGmooOAPRGYjRa9lqK2amxbBSbZQS0GySnSrIRiAHvXIorCwT1AXOvjwV8tAso2SgmS92JlJweK3YF2DkoJkEyUFkSjBzqtAOO4gkk2UFCRrBbORSjbsrLYauibaLkOaOyUFyVrB+EIwswiqr4aOR9tlSHOmpCBZr75TXKOrv6nVIM1N2iQFM+sGfBc40d1H1PV4kXSgVoM0N0ldp2Bm081sm5ktjzk+yMxWm9k6M7sLwN3Xu/tXkhmPSGMryG1T5zhEbesfRNJNshevPU6kWE/IzHKA3wCDgQJgtJkVJDkOkZTRwjfJJElNCu6+EIjdnvI8YF1ly2A/8AwwNJlxiDRUsCVGbG2GWMH4gloDkulSsc1FJ+C9qNtlQCcza2dmDwN9zew7NZ1sZuPMbImZLamoqEh2rJLForfESKQWg1oD0hykYqDZ4hxzd98B3FTXye4+FZgKUFRU5I0cm0godlaS6jJINkhFS6EM6Bx1Ow/YUp8LqJ6CpIK24JZskIqWwltADzPLBzYD1wJj6nMBd58LzC0qKroxCfGJxBW7BXc8wRjEJ/sPcWzLHKD63koi6SypScHMZgIDgPZmVgZMcvdHzWwC8CcgB5ju7ivqed0hwJDu3bs3dsgiCQlaC8HuqoGC3DasLN/Dh3sPckKrtFkGJJKwZM8+Gu3uue5+tLvnufujlcfnufuZ7n6Gu/+4AddV5TVJqTHFXZg1vn+VMYdgM73owelEBqhF0om+yoikqXjV3mL3adIqamlsGZkU1H0k2SBY9AZUKycaUFKQxpaR5TjVfSTZTN1SkkwZ2VIQyXQN6UziTrAAAAgLSURBVAaaUbKJkg07w8pw0dcpLxjF8dtXJS1eyR4Z2VLQOgXJFMHahlhBN9DK8j0Jr4KOV/MhuM7+YzvyUfuejRO0ZLWMTArqPpJMEcxSipcYGtINVJzftlqroiC3DS0/2XZEcYoEMjIpiIhIcmhMQaSJBKuda9smI3oaalOIN+01kfuk+crIpKApqZJpgg/5urbgjp6G2hTiTXtN5D5pvjKy+0hjCpJpgrEFTSWVdJeRSUFERJIjI7uPRDJZ0CUTzEgKxhqC36N/BuKtUYi9jkhjyMikoDEFSWfRO6jGuy96zCD2MQW5beJuux27RiH2OiKNJSOTguopSDqLrdgWe1/sAG5tg7hBCwKqrlGIvY5IY9GYgoiIhJQUREQkpKQgIiKhjBxTEMkmwUykutY47GvThRklm4DIwHT0gHXwe0NWJ0fPjgoGulXop/nKyKSg2UeSTmqbbXSkj49+TOzjo6+z7q8z2demS/hhHQxAB3WiP9x7MDyvvh/iQTKKniarQj/NV0YmBc0+knRS1wyiI3l8XTOZgvumbnuncuvsqmsWgg/0hs5SCupOQ9WZUFqZ3XxpTEFEREJKCiIiElJSEBGRkJKCiIiElBRERCSUkbOPRCQ9lGzYmZQ1DEHVt0D0tRtjbYSqytUsI5OC1imI1K04vy1DCztV+XBNhthtvhtjDcOc0s2sLN9DQW6bKtdrrEpwqipXs4zsPlLlNZHaBesLmuIDryC3TZV1C7G3j+S6qlbX9DIyKYiISHIoKYiISEhJQUREQkoKIiISUlIQEZGQkoKIiISUFEREJKSkICIiISUFEREJpc02F2Z2HPAQsB9Y4O5PpzgkEZGsk9SWgplNN7NtZrY85vggM1ttZuvM7K7Kw18AnnP3G4GrkxmXiIjEl+yWwuPAg8CTwQEzywF+AwwEyoC3zOwFIA9YVvmwQ0mOSyQrrSzfU6XWcuxmdnUJNpErzm9b530lG3Yyo2QTY4q71LrraXC7vvs0xV6zIdcJXo/6nJfqHVaD5y84rQ2ThvRq9OsntaXg7guB2Irh5wHr3H29u+8HngGGEkkQeXXFZWbjzGyJmS2pqKhIRtgiGWfBggUUnlMY976hhZ0ozm9Ln04nVttcrqGbzQUf6nXdDj60g11PIfJBPKd0c3gsuF1f0deMvm59BLuw1ue8YIfVZO8+W9fzJ0sqxhQ6Ae9F3S4DioFfAw+a2ZXA3JpOdvepwFSAoqIiT2KcIs3CmOIutX6jjW45JKI4vy1jirtU+VAMbkdvRx37oRnsehr9fEe6A2pwTWjY3xEbT6Yozm+blFYCpCYpWJxj7u4fA19O6AKqpyAikhSpmJJaBnSOup0HbKnPBVRPQUQkOVKRFN4CephZvpm1BK4FXqjPBcxsiJlN3b17d1ICFBHJVsmekjoTWAScZWZlZvYVdz8ITAD+BKwCnnX3FfW5rloKIiLJkdQxBXcfXcPxecC8ZD63iIjUX0Zuc6HuIxGR5MjIpKDuIxGR5MjIpCAiIslh7pm7/svMKoB/NfD09sD2RgynOdBrUp1ek+r0mlSXaa/J6e7eId4dGZ0UjoSZLXH3olTHkU70mlSn16Q6vSbVNafXRN1HIiISUlIQEZFQNieFqakOIA3pNalOr0l1ek2qazavSdaOKYiISHXZ3FIQEZEYSgoiIhLKuqRQQ33orGZmG81smZmVmtmSVMeTKvFqiptZWzP7s5mtrfx5cipjbGo1vCZ3m9nmyvdLqZldkcoYm5qZdTazV81slZmtMLOvVR5vFu+VrEoKUfWhBwMFwGgzK0htVGnjYncvbC5zrRvocWBQzLG7gL+6ew/gr5W3s8njVH9NAH5Z+X4prNzgMpscBL7p7j2B84FbKz9HmsV7JauSAjXXhxapqab4UOCJyt+fAIY1aVApVsNrktXcvdzd3678/UMiJQA60UzeK9mWFOLVh+5Uw2OziQOvmNlSMxuX6mDSzCnuXg6RDwOgY4rjSRcTzOydyu6ljOwmaQxm1hXoC5TQTN4r2ZYU4taHbvIo0s8F7t6PSLfarWb2+VQHJGntt8AZQCFQDvwiteGkhpkdD/wR+Lq770l1PI0l25LCEdeHbo7cfUvlz23AbCLdbBKx1cxyASp/bktxPCnn7lvd/ZC7HwamkYXvFzM7mkhCeNrd/6fycLN4r2RbUjji+tDNjZkdZ2YnBL8DlwHLaz8rq7wAXF/5+/XAnBTGkhaCD75Kw8my94uZGfAosMrd74+6q1m8V7JuRXPl9LlfATnAdHf/cYpDSikz60akdQCR8qwzsvU1qawpPoDINshbgUnA88CzQBdgE3CNu2fNwGsNr8kAIl1HDmwExgd96dnAzC4EXgOWAYcrD08kMq6Q8e+VrEsKIiJSs2zrPhIRkVooKYiISEhJQUREQkoKIiISUlIQEZFQi1QHIJLJzKwdkc3PAE4FDgEVlbdnAyMrjx0mMnWzpMmDFKkHTUkVaSRmdjfwkbv/3Mz6A/cDA9x9n5m1B1oGq8dF0pVaCiLJkQtsd/d9AO6+PcXxiCREYwoiyfEK0NnM1pjZQ2Z2UaoDEkmEkoJIErj7R8C5wDgiYwyzzGxsSoMSSYC6j0SSxN0PAQuABWa2jMgmaY+nMiaRuqilIJIEZnaWmfWIOlQI/CtV8YgkSi0FkeQ4HnjAzE4iUtN3HZGuJJG0pimpIiISUveRiIiElBRERCSkpCAiIiElBRERCSkpiIhISElBRERCSgoiIhL6/ymxUAt6QB7JAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "(h, be) = np.histogram(trials['ts'], bins=np.arange(0, np.max(trials['ts'])+0.1, 0.1))\n", + "plt.plot(0.5*(be[:-1]+be[1:]), h, drawstyle='steps-mid', label='background')\n", + "plt.vlines(ts, 1, np.max(h), label=f'TS(TXS 0506+056)={ts:.3f}')\n", + "plt.yscale('log')\n", + "plt.xlabel('TS')\n", + "plt.ylabel('#trials per bin')\n", + "plt.legend()\n", + "pass" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/user_manual.pdf b/doc/user_manual.pdf index 2d393319d3..fe2addda01 100644 Binary files a/doc/user_manual.pdf and b/doc/user_manual.pdf differ diff --git a/doc/user_manual.tex b/doc/user_manual.tex index 4b39ca5e22..f7a80bedee 100644 --- a/doc/user_manual.tex +++ b/doc/user_manual.tex @@ -880,6 +880,84 @@ \section{Implemented Log-Likelihood Models} % % The model utilizes a two-component likelihood function with signal and background events. + +\appendix + +\section{Inverse CDF sampling of a bounded power-law} +When working with the 10 years public release of IceCube's data, the generation +of signal events requires as first step the generation of the true neutrino +energy. In order to sample energies from a power-law, we use the technique of +the inverse CDF sampling. When we are dealing with this specific release of +IceCube's data, we need to sample the events from a 5 dimensional histogram +which is giving us the probability of a certain reconstruction given a true +neutrino energy $E_{\nu}$ and a true neutrino declination $\delta_{\nu}$. + +The true neutrino energies stored in the 5-dimensional histogram are binned +starting from $\log(E_{\nu}^{min}/\textrm{GeV})=2$ up to $\log(E_{\nu}^{max}/\textrm{GeV})=9$, +which means that we can only generate energies in that energy range. In practice, +we have to deal with a bounded power-law. + +\begin{equation} + \Phi(E_{\nu} | \phi_0, E_0, \gamma) = \phi_0 \left(\frac{E_{\nu}}{E_0}\right)^{-\gamma},\ \ \ \ E_{\nu} \in [E_{\nu}^{min}, E_{\nu}^{max}] +\end{equation} + +where $\phi_0$ and $E_0$ are normalization factors, and $\gamma$ is the spectral +index of the power-law. + +We need to consider two separate cases now: +\begin{enumerate} + \item $\gamma = 1$:\\ + In this case the power-law reads: + \begin{equation} + \Phi(E_{\nu} | \phi_0, E_0, \gamma=1) = \phi_0 \left(\frac{E_0}{E_{\nu}}\right). + \end{equation} + The correct normalization for the bounded power-law in this case is: + \begin{equation} + N = \phi_0 \int_{E_{\nu}^{min}}^{E_{\nu}^{max}} \left(\frac{E_0}{E_{\nu}}\right) dE_{\nu} = \phi_0 E_0 \log \left(\frac{E_{\nu}^{max}}{E_{\nu}^{min}}\right) + \end{equation} + and the cumulative distribution function (CDF) is given by: + \begin{equation} + x = \phi_0 \int_{E_{\nu}^{min}}^{E_{\nu}} \left(\frac{E_0}{E_{\nu}^{'}}\right) dE_{\nu}^{'} = \phi_0 E_0 \log \left(\frac{E_{\nu}}{E_{\nu}^{min}}\right) + \end{equation} + Therefore, the correctly normalized CDF is given by: + \begin{equation}\label{normed_cdf_gamma1} + x^{'} = \frac{x}{N} = \frac{\log \left(E_{\nu}/E_{\nu}^{min}\right)}{\log \left(E_{\nu}^{max}/E_{\nu}^{min}\right)}, + \end{equation} + where the constant factor $\phi_0 E_0$ cancels out, and $x^{'} \in [0,1]$.\\ + The inverse of Eq. \ref{normed_cdf_gamma1} gives the energy as function of the CDF: + \begin{equation} + E_{\nu} = e^{x\log(E_{\nu}^{max}/E_{\nu}^{min})} E_{\nu}^{max} + \end{equation} + + \item $\gamma \neq 1$:\\ + In this case the power-law reads: + \begin{equation} + \Phi(E_{\nu} | \phi_0, E_0, \gamma \neq 1) = \phi_0 \left(\frac{E_{\nu}}{E_0}\right)^{-\gamma}. + \end{equation} + The correct normalization for the bounded power-law in this case is: + \begin{equation} + N = \phi_0 \int_{E_{\nu}^{min}}^{E_{\nu}^{max}} \left(\frac{E_{\nu}}{E_0}\right)^{-\gamma} dE_{\nu} = + \phi_0 \frac{E_0^{\gamma}}{1-\gamma} \left[(E_{\nu}^{max})^{1-\gamma}-(E_{\nu}^{min})^{1-\gamma}\right] + \end{equation} + and the cumulative distribution function (CDF) is given by: + \begin{equation} + x = \phi_0 \int_{E_{\nu}^{min}}^{E_{\nu}} \left(\frac{E_{\nu}}{E_0}\right)^{-\gamma} dE_{\nu} = + \phi_0 \frac{E_0^{\gamma}}{1-\gamma} \left[(E_{\nu})^{1-\gamma}-(E_{\nu}^{min})^{1-\gamma}\right] + \end{equation} + Therefore, the correctly normalized CDF is given by: + \begin{equation}\label{normed_cdf_gammanot1} + x^{'} = \frac{x}{N} = \frac{\left[(E_{\nu})^{1-\gamma}-(E_{\nu}^{min})^{1-\gamma}\right]}{\left[(E_{\nu}^{max})^{1-\gamma}-(E_{\nu}^{min})^{1-\gamma}\right]} + \end{equation} + where the constant factor $\phi_0 E_0^{\gamma}/(1-\gamma)$ cancels out, and $x^{'} \in [0,1]$.\\ + The inverse of Eq. \ref{normed_cdf_gammanot1} gives the energy as function of the CDF: + \begin{equation} + E_{\nu} = \big\{ x\left[(E_{\nu}^{max})^{1-\gamma}-(E_{\nu}^{min})^{1-\gamma}\right] + (E_{\nu}^{min})^{1-\gamma} \big\}^{\frac{1}{1-\gamma}} + \end{equation} +\end{enumerate} + +Hence, one can randomly draw energies according to the power-law distribution by generating uniformly distributed numbers between +0 and 1 and passing them to the inverse CDF formula, being careful of applying the correct normalization. + \bibliographystyle{unsrt} \bibliography{biblio} diff --git a/skyllh/analyses/__init__.py b/skyllh/analyses/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/skyllh/analyses/i3/__init__.py b/skyllh/analyses/i3/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/skyllh/analyses/i3/publicdata_ps/__init__.py b/skyllh/analyses/i3/publicdata_ps/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/skyllh/analyses/i3/publicdata_ps/aeff.py b/skyllh/analyses/i3/publicdata_ps/aeff.py new file mode 100644 index 0000000000..798b758983 --- /dev/null +++ b/skyllh/analyses/i3/publicdata_ps/aeff.py @@ -0,0 +1,378 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +from scipy import interpolate +from scipy import integrate + +from skyllh.core.binning import ( + get_bincenters_from_binedges, + get_bin_indices_from_lower_and_upper_binedges, +) +from skyllh.core.storage import create_FileLoader + +from skyllh.analyses.i3.publicdata_ps.utils import FctSpline2D + + +def load_effective_area_array(pathfilenames): + """Loads the (nbins_decnu, nbins_log10enu)-shaped 2D effective + area array from the given data file. + + Parameters + ---------- + pathfilename : str | list of str + The file name of the data file. + + Returns + ------- + aeff_decnu_log10enu : (nbins_decnu, nbins_log10enu)-shaped 2D ndarray + The ndarray holding the effective area for each + (dec_nu,log10(E_nu/GeV)) bin. + decnu_binedges_lower : (nbins_decnu,)-shaped ndarray + The ndarray holding the lower bin edges of the dec_nu axis. + decnu_binedges_upper : (nbins_decnu,)-shaped ndarray + The ndarray holding the upper bin edges of the dec_nu axis. + log10_enu_binedges_lower : (nbins_log10enu,)-shaped ndarray + The ndarray holding the lower bin edges of the log10(E_nu/GeV) axis. + log10_enu_binedges_upper : (nbins_log10enu,)-shaped ndarray + The ndarray holding the upper bin edges of the log10(E_nu/GeV) axis. + """ + loader = create_FileLoader(pathfilenames=pathfilenames) + data = loader.load_data() + renaming_dict = { + 'log10(E_nu/GeV)_min': 'log10_enu_min', + 'log10(E_nu/GeV)_max': 'log10_enu_max', + 'Dec_nu_min[deg]': 'decnu_min', + 'Dec_nu_max[deg]': 'decnu_max', + 'A_Eff[cm^2]': 'a_eff' + } + data.rename_fields(renaming_dict, must_exist=True) + + # Convert the true neutrino declination from degrees to radians. + data['decnu_min'] = np.deg2rad(data['decnu_min']) + data['decnu_max'] = np.deg2rad(data['decnu_max']) + + # Determine the binning for energy and declination. + log10_enu_binedges_lower = np.unique(data['log10_enu_min']) + log10_enu_binedges_upper = np.unique(data['log10_enu_max']) + decnu_binedges_lower = np.unique(data['decnu_min']) + decnu_binedges_upper = np.unique(data['decnu_max']) + + if(len(log10_enu_binedges_lower) != len(log10_enu_binedges_upper)): + raise ValueError('Cannot extract the log10(E/GeV) binning of the ' + 'effective area from data file "{}". The number of lower and upper ' + 'bin edges is not equal!'.format(str(loader.pathfilename_list))) + if(len(decnu_binedges_lower) != len(decnu_binedges_upper)): + raise ValueError('Cannot extract the dec_nu binning of the effective ' + 'area from data file "{}". The number of lower and upper bin edges ' + 'is not equal!'.format(str(loader.pathfilename_list))) + + nbins_log10_enu = len(log10_enu_binedges_lower) + nbins_decnu = len(decnu_binedges_lower) + + # Construct the 2d array for the effective area. + aeff_decnu_log10enu = np.zeros( + (nbins_decnu, nbins_log10_enu), dtype=np.double) + + decnu_idx = np.digitize( + 0.5*(data['decnu_min'] + + data['decnu_max']), + decnu_binedges_lower) - 1 + log10enu_idx = np.digitize( + 0.5*(data['log10_enu_min'] + + data['log10_enu_max']), + log10_enu_binedges_lower) - 1 + + aeff_decnu_log10enu[decnu_idx, log10enu_idx] = data['a_eff'] + + return ( + aeff_decnu_log10enu, + decnu_binedges_lower, + decnu_binedges_upper, + log10_enu_binedges_lower, + log10_enu_binedges_upper + ) + + +class PDAeff(object): + """This class provides a representation of the effective area provided by + the public data. + """ + def __init__( + self, pathfilenames, src_dec=None, + min_log10enu=None, max_log10enu=None, + **kwargs): + """Creates an effective area instance by loading the effective area + data from the given file. + + Parameters + ---------- + pathfilenames : str | list of str + The path file names of the effective area data file(s) which should + be used for this public data effective area instance. + src_dec : float | None + The source declination in radians for which detection probabilities + should get pre-calculated using the ``get_detection_prob_for_decnu`` + method. + min_log10enu : float | None + The minimum log10(E_nu/GeV) value that should be used for + calculating the detection probability. + If None, the lowest available neutrino energy bin edge of the + effective area is used. + max_log10enu : float | None + The maximum log10(E_nu/GeV) value that should be used for + calculating the detection probability. + If None, the highest available neutrino energy bin edge of the + effective area is used. + """ + super().__init__(**kwargs) + + ( + self._aeff_decnu_log10enu, + self._decnu_binedges_lower, + self._decnu_binedges_upper, + self._log10_enu_binedges_lower, + self._log10_enu_binedges_upper + ) = load_effective_area_array(pathfilenames) + + # Note: self._aeff_decnu_log10enu is numpy 2D ndarray of shape + # (nbins_decnu, nbins_log10enu). + + # Cut the energies where all effective areas are zero. + m = np.sum(self._aeff_decnu_log10enu, axis=0) > 0 + self._aeff_decnu_log10enu = self._aeff_decnu_log10enu[:,m] + self._log10_enu_binedges_lower = self._log10_enu_binedges_lower[m] + self._log10_enu_binedges_upper = self._log10_enu_binedges_upper[m] + + self._decnu_binedges = np.concatenate( + (self._decnu_binedges_lower, + self._decnu_binedges_upper[-1:]) + ) + self._log10_enu_binedges = np.concatenate( + (self._log10_enu_binedges_lower, + self._log10_enu_binedges_upper[-1:]) + ) + + # Pre-calculate detection probabilities for a certain neutrino + # declination if requested. + if src_dec is not None: + if min_log10enu is None: + min_log10enu = self._log10_enu_binedges_lower[0] + else: + min_log10enu = max( + self._log10_enu_binedges_lower[0], + min_log10enu) + + if max_log10enu is None: + max_log10enu = self._log10_enu_binedges_upper[-1] + else: + max_log10enu = min( + self._log10_enu_binedges_upper[-1], + max_log10enu) + + m = ( + (self.log10_enu_bincenters >= min_log10enu) & + (self.log10_enu_bincenters < max_log10enu) + ) + bin_centers = self.log10_enu_bincenters[m] + low_bin_edges = self._log10_enu_binedges_lower[m] + high_bin_edges = self._log10_enu_binedges_upper[m] + + # Get the detection probability P(E_nu | sin(dec)) per bin. + self.det_prob = self.get_detection_prob_for_decnu( + src_dec, + 10**low_bin_edges, 10**high_bin_edges, + 10**low_bin_edges[0], 10**high_bin_edges[-1] + ) + + @property + def decnu_binedges(self): + """(read-only) The bin edges of the neutrino declination axis in + radians. + """ + return self._decnu_binedges + + @property + def sin_decnu_binedges(self): + """(read-only) The sin of the bin edges of the neutrino declination + in radians. + """ + return np.sin(self._decnu_binedges) + + @property + def decnu_bincenters(self): + """(read-only) The bin center values of the neutrino declination axis in + radians. + """ + return get_bincenters_from_binedges(self._decnu_binedges) + + @property + def n_decnu_bins(self): + """(read-only) The number of bins of the neutrino declination axis. + """ + return len(self._decnu_binedges) - 1 + + @property + def log10_enu_binedges(self): + """(read-only) The bin edges of the log10(E_nu/GeV) neutrino energy + axis. + """ + return self._log10_enu_binedges + + @property + def log10_enu_bincenters(self): + """(read-only) The bin center values of the log10(E_nu/GeV) neutrino + energy axis. + """ + return get_bincenters_from_binedges(self._log10_enu_binedges) + + @property + def n_log10_enu_bins(self): + """(read-only) The number of bins of the log10 neutrino energy axis. + """ + return len(self._log10_enu_binedges) - 1 + + @property + def aeff_decnu_log10enu(self): + """(read-only) The effective area in cm^2 as (n_decnu,n_log10enu)-shaped + 2D numpy ndarray. + """ + return self._aeff_decnu_log10enu + + def create_sin_decnu_log10_enu_spline(self): + """DEPRECATED! + Creates a FctSpline2D object representing a 2D spline of the + effective area in sin(dec_nu)-log10(E_nu/GeV)-space. + + Returns + ------- + spl : FctSpline2D instance + The FctSpline2D instance representing a spline in the + sin(dec_nu)-log10(E_nu/GeV)-space. + """ + spl = FctSpline2D( + self._aeff_decnu_log10enu, + self.sin_decnu_binedges, + self.log10_enu_binedges + ) + return spl + + def get_aeff_for_decnu(self, decnu): + """Retrieves the effective area as function of log10_enu. + + Parameters + ---------- + decnu : float + The true neutrino declination. + + Returns + ------- + aeff : (n,)-shaped numpy ndarray + The effective area in cm^2 for the given true neutrino declination + as a function of log10 true neutrino energy. + """ + decnu_idx = np.digitize(decnu, self._decnu_binedges) - 1 + + aeff = self._aeff_decnu_log10enu[decnu_idx] + + return aeff + + def get_detection_prob_for_decnu( + self, decnu, enu_min, enu_max, enu_range_min, enu_range_max): + """Calculates the detection probability for given true neutrino energy + ranges for a given neutrino declination. + + Parameters + ---------- + decnu : float + The neutrino declination in radians. + enu_min : float | ndarray of float + The minimum energy in GeV. + enu_max : float | ndarray of float + The maximum energy in GeV. + enu_range_min : float + The minimum energy in GeV of the entire energy range. + enu_range_max : float + The maximum energy in GeV of the entire energy range. + + Returns + ------- + det_prob : ndarray of float + The neutrino energy detection probabilities for the given true + enegry ranges. + """ + enu_binedges = np.power(10, self.log10_enu_binedges) + + # Get the bin indices for the lower and upper energy range values. + (lidx,) = get_bin_indices_from_lower_and_upper_binedges( + enu_binedges[:-1], + enu_binedges[1:], + np.array([enu_range_min]) + ) + if enu_range_max >= enu_binedges[-1]: + uidx = len(enu_binedges)-1 + else: + (uidx,) = get_bin_indices_from_lower_and_upper_binedges( + enu_binedges[:-1], + enu_binedges[1:], + np.array([enu_range_max]) + ) + # Note: The get_bin_indices_from_lower_and_upper_binedges function + # is based on the lower edges. So by definition the upper bin + # index is one too large. + # uidx -= 1 + + aeff = self.get_aeff_for_decnu(decnu) + aeff = aeff[lidx:uidx] + enu_binedges = enu_binedges[lidx:uidx+1] + + dE = np.diff(enu_binedges) + + daeff_dE = aeff / dE + + # Create a spline representation that spans the entire enu range. + x = np.empty((len(enu_binedges)+1,), dtype=np.double) + x[0] = enu_binedges[0] + x[1:-1] = get_bincenters_from_binedges(enu_binedges) + x[-1] = enu_binedges[-1] + + y = np.empty((len(enu_binedges)+1,), dtype=np.double) + y[0] = daeff_dE[0] + y[1:-1] = daeff_dE + y[-1] = daeff_dE[-1] + + spl = interpolate.splrep( + x, + y, + k=1, + s=0 + ) + + def _eval_spl_func(x): + return interpolate.splev(x, spl, der=0, ext=1) + + norm = integrate.quad( + _eval_spl_func, + enu_range_min, + enu_range_max, + limit=200, + full_output=1 + )[0] + + enu_min = np.atleast_1d(enu_min) + enu_max = np.atleast_1d(enu_max) + + det_prob = np.empty((len(enu_min),), dtype=np.double) + for i in range(len(enu_min)): + integral = integrate.quad( + _eval_spl_func, + enu_min[i], + enu_max[i], + limit=200, + full_output=1 + )[0] + + det_prob[i] = integral / norm + + return det_prob + diff --git a/skyllh/analyses/i3/publicdata_ps/backgroundpdf.py b/skyllh/analyses/i3/publicdata_ps/backgroundpdf.py new file mode 100644 index 0000000000..492914272e --- /dev/null +++ b/skyllh/analyses/i3/publicdata_ps/backgroundpdf.py @@ -0,0 +1,450 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +from skyllh.core.binning import ( + BinningDefinition, + UsesBinning, +) +from skyllh.core.pdf import ( + EnergyPDF, + IsBackgroundPDF, + PDFAxis +) +from skyllh.core.storage import DataFieldRecordArray +from skyllh.core.timing import TaskTimer +from skyllh.core.smoothing import ( + UNSMOOTH_AXIS, + SmoothingFilter, + HistSmoothingMethod, + NoHistSmoothingMethod, + NeighboringBinHistSmoothingMethod +) +from skyllh.core.timing import TaskTimer + +from scipy.stats import gaussian_kde + + +class PDEnergyPDF(EnergyPDF, UsesBinning): + """This is the base class for IceCube specific energy PDF models. + IceCube energy PDFs depend solely on the energy and the + zenith angle, and hence, on the declination of the event. + + The IceCube energy PDF is modeled as a 1d histogram in energy, + but for different sin(declination) bins, hence, stored as a 2d histogram. + """ + + _KDE_BW_NORTH = 0.4 + _KDE_BW_SOUTH = 0.32 + + def __init__(self, data_logE, data_sinDec, data_mcweight, data_physicsweight, + logE_binning, sinDec_binning, smoothing_filter, kde_smoothing=False): + """Creates a new IceCube energy PDF object. + + Parameters + ---------- + data_logE : 1d ndarray + The array holding the log10(E) values of the events. + data_sinDec : 1d ndarray + The array holding the sin(dec) values of the events. + data_mcweight : 1d ndarray + The array holding the monte-carlo weights of the events. + The final data weight will be the product of data_mcweight and + data_physicsweight. + data_physicsweight : 1d ndarray + The array holding the physics weights of the events. + The final data weight will be the product of data_mcweight and + data_physicsweight. + logE_binning : BinningDefinition + The binning definition for the log(E) axis. + sinDec_binning : BinningDefinition + The binning definition for the sin(declination) axis. + smoothing_filter : SmoothingFilter instance | None + The smoothing filter to use for smoothing the energy histogram. + If None, no smoothing will be applied. + kde_smoothing : bool + Apply a kde smoothing to the energy pdf for each bin in sin(dec). + This is useful for signal injections, because it ensures that the + background is not zero when injecting high energy events. + Default: False. + """ + super(PDEnergyPDF, self).__init__() + + # Define the PDF axes. + self.add_axis(PDFAxis(name='log_energy', + vmin=logE_binning.lower_edge, + vmax=logE_binning.upper_edge)) + self.add_axis(PDFAxis(name='sin_dec', + vmin=sinDec_binning.lower_edge, + vmax=sinDec_binning.upper_edge)) + + self.add_binning(logE_binning, 'log_energy') + self.add_binning(sinDec_binning, 'sin_dec') + + # Create the smoothing method instance tailored to the energy PDF. + # We will smooth only the first axis (logE). + if((smoothing_filter is not None) and + (not isinstance(smoothing_filter, SmoothingFilter))): + raise TypeError( + 'The smoothing_filter argument must be None or an instance of SmoothingFilter!') + if(smoothing_filter is None): + self.hist_smoothing_method = NoHistSmoothingMethod() + else: + self.hist_smoothing_method = NeighboringBinHistSmoothingMethod( + (smoothing_filter.axis_kernel_array, UNSMOOTH_AXIS)) + + # We have to figure out, which histogram bins are zero due to no + # monte-carlo coverage, and which due to zero physics model + # contribution. + + # Create a 2D histogram with only the MC events to determine the MC + # coverage. + (h, bins_logE, bins_sinDec) = np.histogram2d( + data_logE, data_sinDec, + bins=[ + logE_binning.binedges, sinDec_binning.binedges], + range=[ + logE_binning.range, sinDec_binning.range], + density=False) + h = self._hist_smoothing_method.smooth(h) + self._hist_mask_mc_covered = h > 0 + + # Select the events which have MC coverage but zero physics + # contribution, i.e. the physics model predicts zero contribution. + mask = data_physicsweight == 0. + + # Create a 2D histogram with only the MC events that have zero physics + # contribution. Note: By construction the zero physics contribution bins + # are a subset of the MC covered bins. + (h, bins_logE, bins_sinDec) = np.histogram2d( + data_logE[mask], data_sinDec[mask], + bins=[ + logE_binning.binedges, sinDec_binning.binedges], + range=[ + logE_binning.range, sinDec_binning.range], + density=False) + h = self._hist_smoothing_method.smooth(h) + self._hist_mask_mc_covered_zero_physics = h > 0 + + if kde_smoothing: + # If a bandwidth is passed, apply a KDE-based smoothing with the given + # bw parameter as bandwidth for the fit. + if not isinstance(kde_smoothing, bool): + raise ValueError( + "The bandwidth parameter must be True or False!") + kde_pdf = np.empty( + (len(sinDec_binning.bincenters),), dtype=object) + data_logE_masked = data_logE[~mask] + data_sinDec_masked = data_sinDec[~mask] + for i in range(len(sinDec_binning.bincenters)): + sindec_mask = np.logical_and( + data_sinDec_masked >= sinDec_binning.binedges[i], + data_sinDec_masked < sinDec_binning.binedges[i+1] + ) + this_energy = data_logE_masked[sindec_mask] + if sinDec_binning.binedges[i] >= 0: + kde_pdf[i] = gaussian_kde( + this_energy, bw_method=self._KDE_BW_NORTH) + else: + kde_pdf[i] = gaussian_kde( + this_energy, bw_method=self._KDE_BW_SOUTH) + h = np.vstack( + [kde_pdf[i].evaluate(logE_binning.bincenters) + for i in range(len(sinDec_binning.bincenters))]).T + + else: + # Create a 2D histogram with only the data which has physics + # contribution. We will do the normalization along the logE + # axis manually. + data_weights = data_mcweight[~mask] * data_physicsweight[~mask] + (h, bins_logE, bins_sinDec) = np.histogram2d( + data_logE[~mask], data_sinDec[~mask], + bins=[ + logE_binning.binedges, sinDec_binning.binedges], + weights=data_weights, + range=[ + logE_binning.range, sinDec_binning.range], + density=False) + + # Calculate the normalization for each logE bin. Hence we need to sum + # over the logE bins (axis 0) for each sin(dec) bin and need to divide + # by the logE bin widths along the sin(dec) bins. The result array norm + # is a 2D array of the same shape as h. + norms = np.sum(h, axis=(0,))[np.newaxis, ...] * \ + np.diff(logE_binning.binedges)[..., np.newaxis] + h /= norms + h = self._hist_smoothing_method.smooth(h) + + self._hist_logE_sinDec = h + + @property + def hist_smoothing_method(self): + """The HistSmoothingMethod instance defining the smoothing filter of the + energy PDF histogram. + """ + return self._hist_smoothing_method + + @hist_smoothing_method.setter + def hist_smoothing_method(self, method): + if(not isinstance(method, HistSmoothingMethod)): + raise TypeError( + 'The hist_smoothing_method property must be an instance of HistSmoothingMethod!') + self._hist_smoothing_method = method + + @property + def hist(self): + """(read-only) The 2D logE-sinDec histogram array. + """ + return self._hist_logE_sinDec + + @property + def hist_mask_mc_covered(self): + """(read-only) The boolean ndarray holding the mask of the 2D histogram + bins for which there is monte-carlo coverage. + """ + return self._hist_mask_mc_covered + + @property + def hist_mask_mc_covered_zero_physics(self): + """(read-only) The boolean ndarray holding the mask of the 2D histogram + bins for which there is monte-carlo coverage but zero physics + contribution. + """ + return self._hist_mask_mc_covered_zero_physics + + @property + def hist_mask_mc_covered_with_physics(self): + """(read-only) The boolean ndarray holding the mask of the 2D histogram + bins for which there is monte-carlo coverage and has physics + contribution. + """ + return self._hist_mask_mc_covered & ~self._hist_mask_mc_covered_zero_physics + + def get_prob(self, tdm, fitparams=None, tl=None): + """Calculates the energy probability (in logE) of each event. + + Parameters + ---------- + tdm : instance of TrialDataManager + The TrialDataManager instance holding the data events for which the + probability should be calculated for. The following data fields must + exist: + + - 'log_energy' : float + The logarithm of the energy value of the event. + - 'sin_dec' : float + The sin(declination) value of the event. + + fitparams : None + Unused interface parameter. + tl : TimeLord instance | None + The optional TimeLord instance that should be used to measure + timing information. + + Returns + ------- + prob : 1D (N_events,) shaped ndarray + The array with the energy probability for each event. + """ + get_data = tdm.get_data + + logE_binning = self.get_binning('log_energy') + sinDec_binning = self.get_binning('sin_dec') + + logE_idx = np.digitize( + get_data('log_energy'), logE_binning.binedges) - 1 + sinDec_idx = np.digitize( + get_data('sin_dec'), sinDec_binning.binedges) - 1 + + with TaskTimer(tl, 'Evaluating logE-sinDec histogram.'): + prob = self._hist_logE_sinDec[(logE_idx, sinDec_idx)] + + return prob + + +class PDDataBackgroundI3EnergyPDF(PDEnergyPDF, IsBackgroundPDF): + """This is the IceCube energy background PDF, which gets constructed from + experimental data. This class is derived from I3EnergyPDF. + """ + + def __init__(self, data_exp, logE_binning, sinDec_binning, + smoothing_filter=None, kde_smoothing=False): + """Constructs a new IceCube energy background PDF from experimental + data. + + Parameters + ---------- + data_exp : instance of DataFieldRecordArray + The array holding the experimental data. The following data fields + must exist: + + - 'log_energy' : float + The logarithm of the reconstructed energy value of the data + event. + - 'sin_dec' : float + The sine of the reconstructed declination of the data event. + + logE_binning : BinningDefinition + The binning definition for the binning in log10(E). + sinDec_binning : BinningDefinition + The binning definition for the sin(declination). + smoothing_filter : SmoothingFilter instance | None + The smoothing filter to use for smoothing the energy histogram. + If None, no smoothing will be applied. + """ + if(not isinstance(data_exp, DataFieldRecordArray)): + raise TypeError('The data_exp argument must be an instance of ' + 'DataFieldRecordArray!') + + data_logE = data_exp['log_energy'] + data_sinDec = data_exp['sin_dec'] + # For experimental data, the MC and physics weight are unity. + data_mcweight = np.ones((len(data_exp),)) + data_physicsweight = data_mcweight + + # Create the PDF using the base class. + super(PDDataBackgroundI3EnergyPDF, self).__init__( + data_logE, data_sinDec, data_mcweight, data_physicsweight, + logE_binning, sinDec_binning, smoothing_filter, kde_smoothing + ) + + +class PDMCBackgroundI3EnergyPDF(EnergyPDF, IsBackgroundPDF, UsesBinning): + """This class provides a background energy PDF constructed from the public + data and a monte-carlo background flux model. + """ + + def __init__( + self, pdf_log10emu_sindecmu, log10emu_binning, sindecmu_binning, + **kwargs): + """Constructs a new background energy PDF with the given PDF data and + binning. + + Parameters + ---------- + pdf_log10emu_sindecmu : 2D numpy ndarray + The (n_log10emu, n_sindecmu)-shaped 2D numpy ndarray holding the + PDF values in unit 1/log10(E_mu/GeV). + A copy of this data will be created and held within this class + instance. + log10emu_binning : BinningDefinition + The binning definition for the binning in log10(E_mu/GeV). + sindecmu_binning : BinningDefinition + The binning definition for the binning in sin(dec_mu). + """ + if not isinstance(pdf_log10emu_sindecmu, np.ndarray): + raise TypeError( + 'The pdf_log10emu_sindecmu argument must be an instance of ' + 'numpy.ndarray!') + if not isinstance(sindecmu_binning, BinningDefinition): + raise TypeError( + 'The sindecmu_binning argument must be an instance of ' + 'BinningDefinition!') + if not isinstance(log10emu_binning, BinningDefinition): + raise TypeError( + 'The log10emu_binning argument must be an instance of ' + 'BinningDefinition!') + + super().__init__(**kwargs) + + self.add_axis(PDFAxis( + log10emu_binning.name, + log10emu_binning.lower_edge, + log10emu_binning.upper_edge, + )) + + self.add_axis(PDFAxis( + sindecmu_binning.name, + sindecmu_binning.lower_edge, + sindecmu_binning.upper_edge, + )) + + self._hist_logE_sinDec = np.copy(pdf_log10emu_sindecmu) + self.add_binning(log10emu_binning, name='log_energy') + self.add_binning(sindecmu_binning, name='sin_dec') + + def assert_is_valid_for_trial_data(self, tdm): + """Checks if this PDF covers the entire value range of the trail + data events. + + Parameters + ---------- + tdm : TrialDataManager instance + The TrialDataManager instance holding the data events. + The following data fields need to exist: + + 'sin_dec' + + 'log_energy' + + Raises + ------ + ValueError + If parts of the trial data is outside the value range of this + PDF. + """ + sindecmu = tdm.get_data('sin_dec') + if np.min(sindecmu) < self.get_axis(0).vmin: + raise ValueError( + 'The minimum sindecmu value %e of the trial data is lower ' + 'than the minimum value of the PDF %e!' % ( + np.min(sindecmu), self.get_axis(0).vmin)) + if np.max(sindecmu) > self.get_axis(0).vmax: + raise ValueError( + 'The maximum sindecmu value %e of the trial data is larger ' + 'than the maximum value of the PDF %e!' % ( + np.max(sindecmu), self.get_axis(0).vmax)) + + log10emu = tdm.get_data('log_energy') + if np.min(log10emu) < self.get_axis(1).vmin: + raise ValueError( + 'The minimum log10emu value %e of the trial data is lower ' + 'than the minimum value of the PDF %e!' % ( + np.min(log10emu), self.get_axis(1).vmin)) + if np.max(log10emu) > self.get_axis(1).vmax: + raise ValueError( + 'The maximum log10emu value %e of the trial data is larger ' + 'than the maximum value of the PDF %e!' % ( + np.max(log10emu), self.get_axis(1).vmax)) + + def get_prob(self, tdm, params=None, tl=None): + """Gets the probability density for the given trial data events. + + Parameters + ---------- + tdm : TrialDataManager instance + The TrialDataManager instance holding the data events. + The following data fields need to exist: + + 'sin_dec' + + 'log_energy' + + params : dict | None + The dictionary containing the parameter names and values for which + the probability should get calculated. + By definition of this PDF, this is ``Ǹone``, because this PDF does + not depend on any parameters. + tl : TimeLord instance | None + The optional TimeLord instance that should be used to measure + timing information. + + Returns + ------- + prob : (N_events,)-shaped numpy ndarray + The 1D numpy ndarray with the probability density for each event. + """ + get_data = tdm.get_data + + log10emu = get_data('log_energy') + sindecmu = get_data('sin_dec') + + log10emu_idxs = np.digitize( + log10emu, self.get_binning('log_energy').binedges) - 1 + sindecmu_idxs = np.digitize( + sindecmu, self.get_binning('sin_dec').binedges) - 1 + + with TaskTimer(tl, 'Evaluating sindecmu-log10emu PDF.'): + pd = self._hist_logE_sinDec[(log10emu_idxs, sindecmu_idxs)] + + return pd diff --git a/skyllh/analyses/i3/publicdata_ps/bkg_flux.py b/skyllh/analyses/i3/publicdata_ps/bkg_flux.py new file mode 100644 index 0000000000..731489f367 --- /dev/null +++ b/skyllh/analyses/i3/publicdata_ps/bkg_flux.py @@ -0,0 +1,381 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pickle + +from skyllh.physics.flux import PowerLawFlux +from skyllh.core.binning import get_bincenters_from_binedges + + +def get_dOmega(dec_min, dec_max): + """Calculates the solid angle given two declination angles. + + Parameters + ---------- + dec_min : float | array of float + The smaller declination angle. + dec_max : float | array of float + The larger declination angle. + + Returns + ------- + solidangle : float | array of float + The solid angle corresponding to the two given declination angles. + """ + return 2*np.pi*(np.sin(dec_max) - np.sin(dec_min)) + + +def southpole_zen2dec(zen): + """Converts zenith angles at the South Pole to declination angles. + + Parameters + ---------- + zen : (n,)-shaped 1d numpy ndarray + The numpy ndarray holding the zenith angle values in radians. + + Returns + ------- + dec : (n,)-shaped 1d numpy ndarray + The numpy ndarray holding the declination angle values in radians. + """ + dec = zen - np.pi/2 + return dec + + +def get_flux_atmo_decnu_log10enu(flux_pathfilename, log10_enu_max=9): + """Constructs the atmospheric flux map function + f_atmo(log10(E_nu/GeV),dec_nu) in unit 1/(GeV cm^2 sr s). + + Parameters + ---------- + flux_pathfilename : str + The pathfilename of the file containing the MCEq fluxes. + log10_enu_max : float + The log10(E/GeV) value of the maximum neutrino energy to be considered. + + Returns + ------- + flux_atmo : (n_dec, n_e_grid)-shaped 2D numpy ndarray + The numpy ndarray holding the the atmospheric neutrino flux function in + unit 1/(GeV cm^2 sr s). + decnu_binedges : (n_decnu+1,)-shaped 1D numpy ndarray + The numpy ndarray holding the dec_nu bin edges. + log10_enu_binedges : (n_enu+1,)-shaped 1D numpy ndarray + The numpy ndarray holding the neutrino energy bin edges in log10. + """ + with open(flux_pathfilename, 'rb') as f: + ((e_grid, zenith_angle_binedges), flux_def) = pickle.load(f) + zenith_angle_binedges = np.deg2rad(zenith_angle_binedges) + + # Select energy bins below 10**log10_true_e_max GeV. + m_e_grid = e_grid <= 10**log10_enu_max + e_grid = e_grid[m_e_grid] + + decnu_binedges = southpole_zen2dec(zenith_angle_binedges) + decnu_angles = get_bincenters_from_binedges(decnu_binedges) + + # Calculate the neutrino energy bin edges in log10. + log10_enu_binedges = np.empty((len(e_grid)+1),) + d_log10_enu = np.diff(np.log10(e_grid))[0] + log10_enu_binedges[:-1] = np.log10(e_grid) - d_log10_enu/2 + log10_enu_binedges[-1] = log10_enu_binedges[-2] + d_log10_enu + + n_decnu = len(decnu_angles) + n_enu = len(e_grid) + + # Calculate f_atmo(E_nu,dec_nu). + f_atmo = np.zeros((n_decnu, n_enu)) + zero_zen_idx = np.digitize(0, zenith_angle_binedges) - 1 + for (decnu_idx, decnu) in enumerate(decnu_angles): + if decnu < 0: + fl = flux_def['numu_total'][:,decnu_idx][m_e_grid] + else: + # For up-going we use the flux calculation from the streight + # downgoing. + fl = flux_def['numu_total'][:,zero_zen_idx][m_e_grid] + f_atmo[decnu_idx] = fl + + return (f_atmo, decnu_binedges, log10_enu_binedges) + + +def get_flux_astro_decnu_log10enu(decnu_binedges, log10_enu_binedges): + """Constructs the astrophysical neutrino flux function + f_astro(log10(E_nu/GeV),dec_nu) in unit 1/(GeV cm^2 sr s). + + It uses the best fit from the IceCube publication [1]. + + Parameters + ---------- + decnu_binedges : (n_decnu+1,)-shaped 1D numpy ndarray + The numpy ndarray holding the dec_nu bin edges. + log10_enu_binedges : (n_enu+1,)-shaped 1D numpy ndarray + The numpy ndarray holding the log10 values of the neutrino energy bin + edges in GeV. + + Returns + ------- + f_astro : (n_decnu, n_log10enu)-shaped 2D numpy ndarray + The numpy ndarray holding the astrophysical flux values in unit + 1/(GeV cm^2 sr s). + + References + ---------- + [1] https://arxiv.org/pdf/2111.10299.pdf + """ + fluxmodel = PowerLawFlux(Phi0=1.44e-18, E0=100e3, gamma=2.37) + + n_decnu = len(decnu_binedges) - 1 + + enu_binedges = np.power(10, log10_enu_binedges) + enu_bincenters = get_bincenters_from_binedges(enu_binedges) + + fl = fluxmodel(enu_bincenters) + f_astro = np.tile(fl, (n_decnu, 1)) + + return f_astro + + +def convert_flux_bkg_to_pdf_bkg(f_bkg, decnu_binedges, log10_enu_binedges): + """Converts the given background flux function f_bkg into a background flux + PDF in unit 1/(log10(E/GeV) rad). + + Parameters + ---------- + f_bkg : (n_decnu, n_enu)-shaped 2D numpy ndarray + The numpy ndarray holding the background flux values in unit + 1/(GeV cm^2 s sr). + decnu_binedges : (n_decnu+1,)-shaped 1D numpy ndarray + The numpy ndarray holding the dec_nu bin edges in radians. + log10_enu_binedges : (n_enu+1,)-shaped 1D numpy ndarray + The numpy ndarray holding the log10 values of the neutrino energy bin + edges in GeV. + + Returns + ------- + p_bkg : (n_decnu, n_enu)-shaped 2D numpy ndarray + The numpy ndarray holding the background flux pdf values. + """ + d_decnu = np.diff(decnu_binedges) + d_log10_enu = np.diff(log10_enu_binedges) + + bin_area = d_decnu[:,np.newaxis] * d_log10_enu[np.newaxis,:] + p_bkg = f_bkg / np.sum(f_bkg*bin_area) + + # Cross-check the normalization of the PDF. + if not np.isclose(np.sum(p_bkg*bin_area), 1): + raise ValueError( + 'The background PDF is not normalized! The integral is %f!'%(np.sum(p_bkg*bin_area))) + + return p_bkg + + +def get_pd_atmo_decnu_Enu(flux_pathfilename, log10_true_e_max=9): + """Constructs the atmospheric neutrino PDF p_atmo(E_nu,dec_nu) in unit + 1/(GeV rad). + + Parameters + ---------- + flux_pathfilename : str + The pathfilename of the file containing the MCEq flux. + log10_true_e_max : float + The log10(E/GeV) value of the maximum true energy to be considered. + + Returns + ------- + pd_atmo : (n_dec, n_e_grid)-shaped 2D numpy ndarray + The numpy ndarray holding the the atmospheric neutrino PDF in unit + 1/(GeV rad). + decnu_binedges : (n_decnu+1,)-shaped 1D numpy ndarray + The numpy ndarray holding the dec_nu bin edges. + log10_e_grid_edges : (n_e_grid+1,)-shaped 1D numpy ndarray + The numpy ndarray holding the energy bin edges in log10. + """ + with open(flux_pathfilename, 'rb') as f: + ((e_grid, zenith_angle_binedges), flux_def) = pickle.load(f) + + # Select energy bins below 10**log10_true_e_max GeV. + m_e_grid = e_grid <= 10**log10_true_e_max + e_grid = e_grid[m_e_grid] + + zenith_angles = 0.5*(zenith_angle_binedges[:-1] + zenith_angle_binedges[1:]) + decnu_angles = np.deg2rad(zenith_angles) - np.pi/2 + + decnu_binedges = np.deg2rad(zenith_angle_binedges) - np.pi/2 + d_decnu = np.diff(decnu_binedges) + + # Calculate the e_grid bin edges in log10. + log10_e_grid_edges = np.empty((len(e_grid)+1),) + d_log10_e_grid = np.diff(np.log10(e_grid))[0] + log10_e_grid_edges[:-1] = np.log10(e_grid) - d_log10_e_grid/2 + log10_e_grid_edges[-1] = log10_e_grid_edges[-2] + d_log10_e_grid + + n_decnu = len(decnu_angles) + n_e_grid = len(e_grid) + + # Calculate p_atmo(E_nu,dec_nu). + pd_atmo = np.zeros((n_decnu, n_e_grid)) + for (decnu_idx, decnu) in enumerate(decnu_angles): + if decnu < 0: + fl = flux_def['numu_total'][:,decnu_idx][m_e_grid] + else: + # For up-going we use the flux calculation from the streight + # downgoing. + fl = flux_def['numu_total'][:,0][m_e_grid] + pd_atmo[decnu_idx] = fl + # Normalize the PDF. + bin_area = d_decnu[:,np.newaxis] * np.diff(log10_e_grid_edges)[np.newaxis,:] + pd_atmo /= np.sum(pd_atmo*bin_area) + + # Cross-check the normalization of the PDF. + if not np.isclose(np.sum(pd_atmo*bin_area), 1): + raise ValueError( + 'The atmospheric true energy PDF is not normalized! The integral is %f!'%(np.sum(pd_atmo*bin_area))) + + return (pd_atmo, decnu_binedges, log10_e_grid_edges) + + +def get_pd_atmo_E_nu_sin_dec_nu(flux_pathfilename): + """Constructs the atmospheric energy PDF p_atmo(E_nu|sin(dec_nu)) in + unit 1/GeV. + + Parameters + ---------- + flux_pathfilename : str + The pathfilename of the file containing the MCEq flux. + + Returns + ------- + pd_atmo : (n_sin_dec, n_e_grid)-shaped 2D numpy ndarray + The numpy ndarray holding the the atmospheric energy PDF in unit 1/GeV. + sin_dec_binedges : numpy ndarray + The (n_sin_dec+1,)-shaped 1D numpy ndarray holding the sin(dec) bin + edges. + log10_e_grid_edges : numpy ndarray + The (n_e_grid+1,)-shaped 1D numpy ndarray holding the energy bin edges + in log10. + """ + with open(flux_pathfilename, 'rb') as f: + ((e_grid, zenith_angle_binedges), flux_def) = pickle.load(f) + + # Select energy bins below 10**9 GeV. + m_e_grid = e_grid <= 10**9 + e_grid = e_grid[m_e_grid] + + zenith_angles = 0.5*(zenith_angle_binedges[:-1] + zenith_angle_binedges[1:]) + + # Calculate the e_grid bin edges in log10. + log10_e_grid_edges = np.empty((len(e_grid)+1),) + d_log10_e_grid = np.diff(np.log10(e_grid))[0] + log10_e_grid_edges[:-1] = np.log10(e_grid) - d_log10_e_grid/2 + log10_e_grid_edges[-1] = log10_e_grid_edges[-2] + d_log10_e_grid + + # Calculate the energy bin widths of the energy grid. + dE = np.diff(10**log10_e_grid_edges) + + # Convert zenith angles into sin(declination) angles. + sin_dec_binedges = np.sin(np.deg2rad(zenith_angle_binedges) - np.pi/2) + sin_dec_angles = np.sin(np.deg2rad(zenith_angles) - np.pi/2) + + n_e_grid = len(e_grid) + n_sin_dec = len(sin_dec_angles) + + # Calculate p_atmo(E_nu|sin(dec_nu)). + pd_atmo = np.zeros((n_sin_dec, n_e_grid)) + for (sin_dec_idx, sin_dec) in enumerate(sin_dec_angles): + if sin_dec < 0: + fl = flux_def['numu_total'][:,sin_dec_idx][m_e_grid] + else: + # For up-going we use the flux calculation from the streight + # downgoing. + fl = flux_def['numu_total'][:,0][m_e_grid] + pd_atmo[sin_dec_idx] = fl/np.sum(fl*dE) + + # Cross-check the normalization of the PDF. + if not np.all(np.isclose(np.sum(pd_atmo*dE[np.newaxis,:], axis=1), 1)): + raise ValueError( + 'The atmospheric true energy PDF is not normalized!') + + return (pd_atmo, sin_dec_binedges, log10_e_grid_edges) + + +def get_pd_astro_E_nu_sin_dec_nu(sin_dec_binedges, log10_e_grid_edges): + """Constructs the astrophysical energy PDF p_astro(E_nu|sin(dec_nu)) in + unit 1/GeV. + It uses the best fit from the IceCube publication [1]. + + Parameters + ---------- + sin_dec_binedges : (n_sin_dec+1,)-shaped 1D numpy ndarray + The numpy ndarray holding the sin(dec) bin edges. + log10_e_grid_edges : (n_e_grid+1,)-shaped 1D numpy ndarray + The numpy ndarray holding the log10 values of the energy bin edges in + GeV of the energy grid. + + Returns + ------- + pd_astro : (n_sin_dec, n_e_grid)-shaped 2D numpy ndarray + The numpy ndarray holding the energy probability density values + p(E_nu|sin_dec_nu) in unit 1/GeV. + + References + ---------- + [1] https://arxiv.org/pdf/2111.10299.pdf + """ + fluxmodel = PowerLawFlux(Phi0=1.44e-18, E0=100e3, gamma=2.37) + + n_sin_dec = len(sin_dec_binedges) - 1 + n_e_grid = len(log10_e_grid_edges) - 1 + + e_grid_edges = 10**log10_e_grid_edges + e_grid_bc = 0.5*(e_grid_edges[:-1] + e_grid_edges[1:]) + + dE = np.diff(e_grid_edges) + + fl = fluxmodel(e_grid_bc) + pd = fl / np.sum(fl*dE) + pd_astro = np.tile(pd, (n_sin_dec, 1)) + + # Cross-check the normalization of the PDF. + if not np.all(np.isclose(np.sum(pd_astro*dE[np.newaxis,:], axis=1), 1)): + raise ValueError( + 'The astrophysical energy PDF is not normalized!') + + return pd_astro + + +def get_pd_bkg_E_nu_sin_dec_nu(pd_atmo, pd_astro, log10_e_grid_edges): + """Constructs the total background flux probability density + p_bkg(E_nu|sin(dec_nu)) in unit 1/GeV. + + Parameters + ---------- + pd_atmo : (n_sin_dec, n_e_grid)-shaped 2D numpy ndarray + The numpy ndarray holding the probability density values + p(E_nu|sin(dec_nu)) in 1/GeV of the atmospheric flux. + pd_astro : (n_sin_dec, n_e_grid)-shaped 2D numpy ndarray + The numpy ndarray holding the probability density values + p(E_nu|sin(dec_nu)) in 1/GeV of the astrophysical flux. + log10_e_grid_edges : (n_e_grid+1,)-shaped numpy ndarray + The numpy ndarray holding the log10 values of the energy grid bin edges + in GeV. + + Returns + ------- + pd_bkg : (n_sin_dec, n_e_grid)-shaped 2D numpy ndarray + The numpy ndarray holding total background probability density values + p_bkg(E_nu|sin(dec_nu)) in unit 1/GeV. + """ + pd_bkg = pd_atmo + pd_astro + + dE = np.diff(10**log10_e_grid_edges) + + s = np.sum(pd_bkg*dE[np.newaxis,:], axis=1, keepdims=True) + pd_bkg /= s + + if not np.all(np.isclose(np.sum(pd_bkg*dE[np.newaxis,:], axis=1), 1)): + raise ValueError( + 'The background energy PDF is not normalized!') + + return pd_bkg + + diff --git a/skyllh/analyses/i3/publicdata_ps/detsigyield.py b/skyllh/analyses/i3/publicdata_ps/detsigyield.py new file mode 100644 index 0000000000..ad138b75c2 --- /dev/null +++ b/skyllh/analyses/i3/publicdata_ps/detsigyield.py @@ -0,0 +1,214 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +import scipy.interpolate + +from skyllh.core import multiproc +from skyllh.core.binning import BinningDefinition +from skyllh.core.dataset import ( + Dataset, + DatasetData +) +from skyllh.core.livetime import Livetime +from skyllh.core.parameters import ParameterGrid +from skyllh.core.detsigyield import ( + get_integrated_livetime_in_days +) +from skyllh.physics.flux import ( + PowerLawFlux, + get_conversion_factor_to_internal_flux_unit +) +from skyllh.i3.detsigyield import ( + PowerLawFluxPointLikeSourceI3DetSigYieldImplMethod, + PowerLawFluxPointLikeSourceI3DetSigYield +) +from skyllh.analyses.i3.publicdata_ps.aeff import ( + load_effective_area_array +) + + +class PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod( + PowerLawFluxPointLikeSourceI3DetSigYieldImplMethod, + multiproc.IsParallelizable): + """This detector signal yield constructor class constructs a + detector signal yield instance for a variable power law flux model, which + has the spectral index gamma as fit parameter, assuming a point-like source. + It constructs a two-dimensional spline function in sin(dec) and gamma, using + a :class:`scipy.interpolate.RectBivariateSpline`. Hence, the detector signal + yield can vary with the declination and the spectral index, gamma, of the + source. + + This detector signal yield implementation method works with a + PowerLawFlux flux model. + + It is tailored to the IceCube detector at the South Pole, where the + effective area depends solely on the zenith angle, and hence on the + declination, of the source. + + It takes the effective area for the detector signal yield from the auxilary + detector effective area data file given by the public data. + """ + + def __init__( + self, gamma_grid, spline_order_sinDec=2, spline_order_gamma=2, + ncpu=None): + """Creates a new IceCube detector signal yield constructor instance for + a power law flux model. It requires the effective area from the public + data, and a gamma parameter grid to compute the gamma dependency of the + detector signal yield. + + Parameters + ---------- + gamma_grid : ParameterGrid instance + The ParameterGrid instance which defines the grid of gamma values. + spline_order_sinDec : int + The order of the spline function for the logarithmic values of the + detector signal yield along the sin(dec) axis. + The default is 2. + spline_order_gamma : int + The order of the spline function for the logarithmic values of the + detector signal yield along the gamma axis. + The default is 2. + ncpu : int | None + The number of CPUs to utilize. Global setting will take place if + not specified, i.e. set to None. + """ + super().__init__( + gamma_grid=gamma_grid, + sin_dec_binning=None, + spline_order_sinDec=spline_order_sinDec, + spline_order_gamma=spline_order_gamma, + ncpu=ncpu) + + def construct_detsigyield( + self, dataset, data, fluxmodel, livetime, ppbar=None): + """Constructs a detector signal yield 2-dimensional log spline + function for the given power law flux model with varying gamma values. + + Parameters + ---------- + dataset : Dataset instance + The Dataset instance holding the sin(dec) binning definition. + data : DatasetData instance + The DatasetData instance holding the monte-carlo event data. + This implementation loads the effective area from the provided + public data and hence does not need monte-carlo data. + fluxmodel : FluxModel + The flux model instance. Must be an instance of PowerLawFlux. + livetime : float | Livetime instance + The live-time in days or an instance of Livetime to use for the + detector signal yield. + ppbar : ProgressBar instance | None + The instance of ProgressBar of the optional parent progress bar. + + Returns + ------- + detsigyield : PowerLawFluxPointLikeSourceI3DetSigYield instance + The DetSigYield instance for a point-like source with a power law + flux with variable gamma parameter. + """ + # Check for the correct data types of the input arguments. + if(not isinstance(dataset, Dataset)): + raise TypeError('The dataset argument must be an instance of ' + 'Dataset!') + if(not isinstance(data, DatasetData)): + raise TypeError('The data argument must be an instance of ' + 'DatasetData!') + if(not self.supports_fluxmodel(fluxmodel)): + raise TypeError('The DetSigYieldImplMethod "%s" does not support ' + 'the flux model "%s"!' % ( + self.__class__.__name__, + fluxmodel.__class__.__name__)) + if((not isinstance(livetime, float)) and + (not isinstance(livetime, Livetime))): + raise TypeError('The livetime argument must be an instance of ' + 'float or Livetime!') + + # Get integrated live-time in days. + livetime_days = get_integrated_livetime_in_days(livetime) + + # Calculate conversion factor from the flux model unit into the internal + # flux unit GeV^-1 cm^-2 s^-1. + toGeVcm2s = get_conversion_factor_to_internal_flux_unit(fluxmodel) + + # Load the effective area data from the public dataset. + aeff_fnames = dataset.get_abs_pathfilename_list( + dataset.get_aux_data_definition('eff_area_datafile')) + ( + aeff_arr, + sin_true_dec_binedges_lower, + sin_true_dec_binedges_upper, + log_true_e_binedges_lower, + log_true_e_binedges_upper + ) = load_effective_area_array(aeff_fnames) + + # Calculate the detector signal yield in sin_dec vs gamma. + def hist( + energy_bin_edges_lower, energy_bin_edges_upper, + aeff, fluxmodel): + """Creates a histogram of the detector signal yield for the given + sin(dec) binning. + + Parameters + ---------- + energy_bin_edges_lower : 1d ndarray + The array holding the lower bin edges in E_nu/GeV. + energy_bin_edges_upper : 1d ndarray + The array holding the upper bin edges in E_nu/GeV. + aeff : (n_bins_sin_dec, n_bins_log_energy)-shaped 2d ndarray + The effective area binned data array. + + Returns + ------- + h : (n_bins_sin_dec,)-shaped 1d ndarray + The numpy array containing the detector signal yield values for + the different sin_dec bins and the given flux model. + """ + # Create histogram for the number of neutrinos with each energy + # bin. + h_phi = fluxmodel.get_integral( + energy_bin_edges_lower, energy_bin_edges_upper) + + # Sum over the enegry bins for each sin_dec row. + h = np.sum(aeff*h_phi, axis=1) + + return h + + energy_bin_edges_lower = np.power(10, log_true_e_binedges_lower) + energy_bin_edges_upper = np.power(10, log_true_e_binedges_upper) + + # Make a copy of the gamma grid and extend the grid by one bin on each + # side. + gamma_grid = self._gamma_grid.copy() + gamma_grid.add_extra_lower_and_upper_bin() + + # Construct the arguments for the hist function to be used in the + # multiproc.parallelize function. + args_list = [ + ((energy_bin_edges_lower, + energy_bin_edges_upper, + aeff_arr, + fluxmodel.copy({'gamma': gamma})), {}) + for gamma in gamma_grid.grid + ] + h = np.vstack( + multiproc.parallelize( + hist, args_list, self.ncpu, ppbar=ppbar)).T + h *= toGeVcm2s * livetime_days * 86400. + + # Create a 2d spline in log of the detector signal yield. + sin_dec_bincenters = 0.5*( + sin_true_dec_binedges_lower + sin_true_dec_binedges_upper) + log_spl_sinDec_gamma = scipy.interpolate.RectBivariateSpline( + sin_dec_bincenters, gamma_grid.grid, np.log(h), + kx=self.spline_order_sinDec, ky=self.spline_order_gamma, s=0) + + # Construct the detector signal yield instance with the created spline. + sin_dec_binedges = np.concatenate( + (sin_true_dec_binedges_lower, [sin_true_dec_binedges_upper[-1]])) + sin_dec_binning = BinningDefinition('sin_dec', sin_dec_binedges) + detsigyield = PowerLawFluxPointLikeSourceI3DetSigYield( + self, dataset, fluxmodel, livetime, sin_dec_binning, log_spl_sinDec_gamma) + + return detsigyield diff --git a/skyllh/analyses/i3/publicdata_ps/mcbkg_ps.py b/skyllh/analyses/i3/publicdata_ps/mcbkg_ps.py new file mode 100644 index 0000000000..5f98db4220 --- /dev/null +++ b/skyllh/analyses/i3/publicdata_ps/mcbkg_ps.py @@ -0,0 +1,472 @@ +# -*- coding: utf-8 -*- + +"""The mcbkg_ps analysis is a multi-dataset time-integrated single source +analysis with a two-component likelihood function using a spacial and an energy +event PDF. It initializes the background energy pdf using auxiliary fluxes and +pdfs, which are generated by running `scripts/mceq_atm_bkg.py` script. +""" + +import argparse +import logging +import numpy as np +import pickle + +from skyllh.core.progressbar import ProgressBar + +# Classes to define the source hypothesis. +from skyllh.physics.source import PointLikeSource +from skyllh.physics.flux import PowerLawFlux +from skyllh.core.source_hypo_group import SourceHypoGroup +from skyllh.core.source_hypothesis import SourceHypoGroupManager + +# Classes to define the fit parameters. +from skyllh.core.parameters import ( + SingleSourceFitParameterMapper, + FitParameter +) + +# Classes for the minimizer. +from skyllh.core.minimizer import Minimizer, LBFGSMinimizerImpl + +# Classes for utility functionality. +from skyllh.core.config import CFG +from skyllh.core.random import RandomStateService +from skyllh.core.optimize import SpatialBoxEventSelectionMethod +from skyllh.core.smoothing import BlockSmoothingFilter +from skyllh.core.timing import TimeLord +from skyllh.core.trialdata import TrialDataManager + +# Classes for defining the analysis. +from skyllh.core.test_statistic import TestStatisticWilks +from skyllh.core.analysis import ( + TimeIntegratedMultiDatasetSingleSourceAnalysis as Analysis +) + +# Classes to define the background generation. +from skyllh.core.scrambling import DataScrambler, UniformRAScramblingMethod +from skyllh.i3.background_generation import FixedScrambledExpDataI3BkgGenMethod + +# Classes to define the signal and background PDFs. +from skyllh.core.signalpdf import RayleighPSFPointSourceSignalSpatialPDF +from skyllh.i3.signalpdf import SignalI3EnergyPDFSet +from skyllh.i3.backgroundpdf import ( + DataBackgroundI3SpatialPDF, + DataBackgroundI3EnergyPDF +) +from skyllh.i3.pdfratio import ( + I3EnergySigSetOverBkgPDFRatioSpline +) +# Classes to define the spatial and energy PDF ratios. +from skyllh.core.pdfratio import ( + SpatialSigOverBkgPDFRatio, + Skylab2SkylabPDFRatioFillMethod +) + +from skyllh.i3.signal_generation import PointLikeSourceI3SignalGenerationMethod + +# Analysis utilities. +from skyllh.core.analysis_utils import ( + pointlikesource_to_data_field_array +) + +# Logging setup utilities. +from skyllh.core.debugging import ( + setup_logger, + setup_console_handler, + setup_file_handler +) + +# Pre-defined public IceCube data samples. +from skyllh.datasets.i3 import data_samples + +# Analysis specific classes for working with the public data. +from skyllh.analyses.i3.publicdata_ps.signal_generator import ( + PDSignalGenerator +) +from skyllh.analyses.i3.publicdata_ps.detsigyield import ( + PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod +) +from skyllh.analyses.i3.publicdata_ps.signalpdf import ( + PDSignalEnergyPDFSet +) +from skyllh.analyses.i3.publicdata_ps.backgroundpdf import ( + PDMCBackgroundI3EnergyPDF +) +from skyllh.analyses.i3.publicdata_ps.pdfratio import ( + PDPDFRatio +) + + +def psi_func(tdm, src_hypo_group_manager, fitparams): + """Function to calculate the opening angle between the source position + and the event's reconstructed position. + """ + ra = tdm.get_data('ra') + dec = tdm.get_data('dec') + + # Make the source position angles two-dimensional so the PDF value + # can be calculated via numpy broadcasting automatically for several + # sources. This is useful for stacking analyses. + src_ra = tdm.get_data('src_array')['ra'][:, np.newaxis] + src_dec = tdm.get_data('src_array')['dec'][:, np.newaxis] + + delta_dec = np.abs(dec - src_dec) + delta_ra = np.abs(ra - src_ra) + x = ( + (np.sin(delta_dec / 2.))**2. + np.cos(dec) * + np.cos(src_dec) * (np.sin(delta_ra / 2.))**2. + ) + + # Handle possible floating precision errors. + x[x < 0.] = 0. + x[x > 1.] = 1. + + psi = (2.0*np.arcsin(np.sqrt(x))) + + # For now we support only a single source, hence return psi[0]. + return psi[0, :] + + +def TXS_location(): + src_ra = np.radians(77.358) + src_dec = np.radians(5.693) + return (src_ra, src_dec) + + +def create_analysis( + rss, + datasets, + source, + refplflux_Phi0=1, + refplflux_E0=1e3, + refplflux_gamma=2, + ns_seed=10.0, + gamma_seed=3, + cache_dir='.', + cap_ratio=False, + compress_data=False, + keep_data_fields=None, + optimize_delta_angle=10, + efficiency_mode=None, + tl=None, + ppbar=None +): + """Creates the Analysis instance for this particular analysis. + + Parameters: + ----------- + datasets : list of Dataset instances + The list of Dataset instances, which should be used in the + analysis. + source : PointLikeSource instance + The PointLikeSource instance defining the point source position. + refplflux_Phi0 : float + The flux normalization to use for the reference power law flux model. + refplflux_E0 : float + The reference energy to use for the reference power law flux model. + refplflux_gamma : float + The spectral index to use for the reference power law flux model. + ns_seed : float + Value to seed the minimizer with for the ns fit. + gamma_seed : float | None + Value to seed the minimizer with for the gamma fit. If set to None, + the refplflux_gamma value will be set as gamma_seed. + cache_dir : str + The cache directory where to look for cached data, e.g. signal PDFs. + compress_data : bool + Flag if the data should get converted from float64 into float32. + keep_data_fields : list of str | None + List of additional data field names that should get kept when loading + the data. + optimize_delta_angle : float + The delta angle in degrees for the event selection optimization methods. + efficiency_mode : str | None + The efficiency mode the data should get loaded with. Possible values + are: + + - 'memory': + The data will be load in a memory efficient way. This will + require more time, because all data records of a file will + be loaded sequentially. + - 'time': + The data will be loaded in a time efficient way. This will + require more memory, because each data file gets loaded in + memory at once. + + The default value is ``'time'``. If set to ``None``, the default + value will be used. + tl : TimeLord instance | None + The TimeLord instance to use to time the creation of the analysis. + ppbar : ProgressBar instance | None + The instance of ProgressBar for the optional parent progress bar. + + Returns + ------- + analysis : SpatialEnergyTimeIntegratedMultiDatasetSingleSourceAnalysis + The Analysis instance for this analysis. + """ + # Define the flux model. + flux_model = PowerLawFlux( + Phi0=refplflux_Phi0, E0=refplflux_E0, gamma=refplflux_gamma) + + # Define the fit parameter ns. + fitparam_ns = FitParameter('ns', 0, 1e3, ns_seed) + + # Define the gamma fit parameter. + fitparam_gamma = FitParameter( + 'gamma', valmin=1, valmax=5, initial=gamma_seed) + + # Define the detector signal efficiency implementation method for the + # IceCube detector and this source and flux_model. + # The sin(dec) binning will be taken by the implementation method + # automatically from the Dataset instance. + gamma_grid = fitparam_gamma.as_linear_grid(delta=0.1) + detsigyield_implmethod = \ + PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod( + gamma_grid) + + # Define the signal generation method. + #sig_gen_method = PointLikeSourceI3SignalGenerationMethod() + sig_gen_method = None + + # Create a source hypothesis group manager. + src_hypo_group_manager = SourceHypoGroupManager( + SourceHypoGroup( + source, flux_model, detsigyield_implmethod, sig_gen_method)) + + # Create a source fit parameter mapper and define the fit parameters. + src_fitparam_mapper = SingleSourceFitParameterMapper() + src_fitparam_mapper.def_fit_parameter(fitparam_gamma) + + # Define the test statistic. + test_statistic = TestStatisticWilks() + + # Define the data scrambler with its data scrambling method, which is used + # for background generation. + data_scrambler = DataScrambler(UniformRAScramblingMethod()) + + # Create background generation method. + bkg_gen_method = FixedScrambledExpDataI3BkgGenMethod(data_scrambler) + + # Create the minimizer instance. + minimizer = Minimizer(LBFGSMinimizerImpl()) + + # Create the Analysis instance. + analysis = Analysis( + src_hypo_group_manager, + src_fitparam_mapper, + fitparam_ns, + test_statistic, + bkg_gen_method, + sig_generator_cls=PDSignalGenerator + ) + + # Define the event selection method for pure optimization purposes. + # We will use the same method for all datasets. + event_selection_method = SpatialBoxEventSelectionMethod( + src_hypo_group_manager, delta_angle=np.deg2rad(optimize_delta_angle)) + #event_selection_method = None + + # Add the data sets to the analysis. + pbar = ProgressBar(len(datasets), parent=ppbar).start() + for ds in datasets: + # Load the data of the data set. + data = ds.load_and_prepare_data( + keep_fields=keep_data_fields, + compress=compress_data, + efficiency_mode=efficiency_mode, + tl=tl) + + # Create a trial data manager and add the required data fields. + tdm = TrialDataManager() + tdm.add_source_data_field('src_array', + pointlikesource_to_data_field_array) + tdm.add_data_field('psi', psi_func) + + sin_dec_binning = ds.get_binning_definition('sin_dec') + log_energy_binning = ds.get_binning_definition('log_energy') + + # Create the spatial PDF ratio instance for this dataset. + spatial_sigpdf = RayleighPSFPointSourceSignalSpatialPDF( + dec_range=np.arcsin(sin_dec_binning.range)) + spatial_bkgpdf = DataBackgroundI3SpatialPDF( + data.exp, sin_dec_binning) + spatial_pdfratio = SpatialSigOverBkgPDFRatio( + spatial_sigpdf, spatial_bkgpdf) + + # Create the energy PDF ratio instance for this dataset. + energy_sigpdfset = PDSignalEnergyPDFSet( + ds=ds, + src_dec=source.dec, + flux_model=flux_model, + fitparam_grid_set=gamma_grid, + ppbar=ppbar + ) + + #smoothing_filter = BlockSmoothingFilter(nbins=1) + #energy_bkgpdf = DataBackgroundI3EnergyPDF( + # data.exp, log_energy_binning, sin_dec_binning, smoothing_filter) + + bkg_pdf_pathfilename = ds.get_abs_pathfilename_list( + ds.get_aux_data_definition('pdf_bkg_datafile'))[0] + with open(bkg_pdf_pathfilename, 'rb') as f: + bkg_pdf_data = pickle.load(f) + energy_bkgpdf = PDMCBackgroundI3EnergyPDF( + pdf_log10emu_sindecmu=bkg_pdf_data['pdf'], + sindecmu_binning=bkg_pdf_data['sindecmu_binning'], + log10emu_binning=bkg_pdf_data['log10emu_binning'] + ) + + energy_pdfratio = PDPDFRatio( + sig_pdf_set=energy_sigpdfset, + bkg_pdf=energy_bkgpdf, + cap_ratio=cap_ratio + ) + + pdfratios = [spatial_pdfratio, energy_pdfratio] + + analysis.add_dataset( + ds, data, pdfratios, tdm, event_selection_method) + + pbar.increment() + pbar.finish() + + analysis.llhratio = analysis.construct_llhratio(minimizer, ppbar=ppbar) + + # analysis.construct_signal_generator() + + return analysis + + +if(__name__ == '__main__'): + p = argparse.ArgumentParser( + description='Calculates TS for a given source location using the ' + '10-year public point source sample.', + formatter_class=argparse.RawTextHelpFormatter + ) + p.add_argument( + '--dec', + default=23.8, + type=float, + help='The source declination in degrees.' + ) + p.add_argument( + '--ra', + default=216.76, + type=float, + help='The source right-ascention in degrees.' + ) + p.add_argument( + '--gamma-seed', + default=3, + type=float, + help='The seed value of the gamma fit parameter.' + ) + p.add_argument( + '--data_base_path', + default=None, + type=str, + help='The base path to the data samples (default=None)' + ) + p.add_argument( + '--pdf-seed', + default=1, + type=int, + help='The random number generator seed for generating the ' + 'signal PDF.' + ) + p.add_argument( + '--seed', + default=1, + type=int, + help='The random number generator seed for the likelihood ' + 'minimization.' + ) + p.add_argument( + '--ncpu', + default=1, + type=int, + help='The number of CPUs to utilize where parallelization is possible.' + ) + p.add_argument( + '--cache-dir', + default='.', + type=str, + help='The cache directory to look for cached data, e.g. signal PDFs.') + p.add_argument( + '--cap-ratio', + action='store_true', + help='Switch to cap the energy PDF ratio.') + p.set_defaults(cap_ratio=False) + args = p.parse_args() + + # Setup `skyllh` package logging. + # To optimize logging set the logging level to the lowest handling level. + setup_logger('skyllh', logging.DEBUG) + log_format = '%(asctime)s %(processName)s %(name)s %(levelname)s: '\ + '%(message)s' + setup_console_handler('skyllh', logging.INFO, log_format) + setup_file_handler('skyllh', 'debug.log', + log_level=logging.DEBUG, + log_format=log_format) + + CFG['multiproc']['ncpu'] = args.ncpu + + sample_seasons = [ + #('PublicData_10y_ps', 'IC40'), + #('PublicData_10y_ps', 'IC59'), + #('PublicData_10y_ps', 'IC79'), + #('PublicData_10y_ps', 'IC86_I'), + ('PublicData_10y_ps', 'IC86_II'), + #('PublicData_10y_ps', 'IC86_II-VII') + ] + + datasets = [] + for (sample, season) in sample_seasons: + # Get the dataset from the correct dataset collection. + dsc = data_samples[sample].create_dataset_collection( + args.data_base_path) + datasets.append(dsc.get_dataset(season)) + + # Define a random state service. + rss_pdf = RandomStateService(args.pdf_seed) + rss = RandomStateService(args.seed) + # Define the point source. + source = PointLikeSource(np.deg2rad(args.ra), np.deg2rad(args.dec)) + print('source: ', str(source)) + + tl = TimeLord() + + with tl.task_timer('Creating analysis.'): + ana = create_analysis( + rss_pdf, + datasets, + source, + cache_dir=args.cache_dir, + cap_ratio=args.cap_ratio, + gamma_seed=args.gamma_seed, + tl=tl) + + with tl.task_timer('Unblinding data.'): + (TS, fitparam_dict, status) = ana.unblind(rss) + + print('TS = %g' % (TS)) + print('ns_fit = %g' % (fitparam_dict['ns'])) + print('gamma_fit = %g' % (fitparam_dict['gamma'])) + + + # Generate some signal events. + #ana.construct_signal_generator() + #with tl.task_timer('Generating signal events.'): + # (n_sig, signal_events_dict) =\ + # ana.sig_generator.generate_signal_events(rss, 100) + + #trials = ana.do_trials( + # rss, 100, mean_n_sig=20 + #) + + #print('n_sig: %d'%n_sig) + #print('signal datasets: '+str(signal_events_dict.keys())) + + + print(tl) diff --git a/skyllh/analyses/i3/publicdata_ps/pdfratio.py b/skyllh/analyses/i3/publicdata_ps/pdfratio.py new file mode 100644 index 0000000000..961addf28f --- /dev/null +++ b/skyllh/analyses/i3/publicdata_ps/pdfratio.py @@ -0,0 +1,232 @@ +# -*- coding: utf-8 -*- +# Authors: +# Dr. Martin Wolf + +import numpy as np + +from skyllh.core.py import module_classname +from skyllh.core.debugging import get_logger +from skyllh.core.parameters import make_params_hash +from skyllh.core.pdf import PDF +from skyllh.core.pdfratio import SigSetOverBkgPDFRatio + + +class PDPDFRatio(SigSetOverBkgPDFRatio): + def __init__(self, sig_pdf_set, bkg_pdf, cap_ratio=False, **kwargs): + """Creates a PDFRatio instance for the public data. + It takes a signal PDF set for different discrete gamma values. + + Parameters + ---------- + sig_pdf_set : instance of PDSignalEnergyPDFSet + The PDSignalEnergyPDFSet instance holding the set of signal energy + PDFs. + bkg_pdf : instance of PDDataBackgroundI3EnergyPDF + The PDDataBackgroundI3EnergyPDF instance holding the background + energy PDF. + cap_ratio : bool + Switch whether the S/B PDF ratio should get capped where no + background is available. Default is False. + """ + self._logger = get_logger(module_classname(self)) + + super().__init__( + pdf_type=PDF, + signalpdfset=sig_pdf_set, + backgroundpdf=bkg_pdf, + **kwargs) + + # Construct the instance for the fit parameter interpolation method. + self._interpolmethod_instance = self.interpolmethod( + self._get_ratio_values, sig_pdf_set.fitparams_grid_set) + + self.cap_ratio = cap_ratio + if self.cap_ratio: + self._logger.info('The energy PDF ratio will be capped!') + + # Calculate the ratio value for the phase space where no background + # is available. We will take the p_sig percentile of the signal + # like phase space. + ratio_perc = 99 + + # Get the log10 reco energy values where the background pdf has + # non-zero values. + n_logE = bkg_pdf.get_binning('log_energy').nbins + n_sinDec = bkg_pdf.get_binning('sin_dec').nbins + bd = bkg_pdf._hist_logE_sinDec > 0 + log10_e_bc = bkg_pdf.get_binning('log_energy').bincenters + self.ratio_fill_value_dict = dict() + for sig_pdf_key in sig_pdf_set.pdf_keys: + sigpdf = sig_pdf_set[sig_pdf_key] + sigvals = sigpdf.get_pd_by_log10_reco_e(log10_e_bc) + sigvals = np.broadcast_to(sigvals, (n_sinDec, n_logE)).T + r = sigvals[bd] / bkg_pdf._hist_logE_sinDec[bd] + # Remove possible inf values. + r = r[np.invert(np.isinf(r))] + val = np.percentile(r[r > 1.], ratio_perc) + self.ratio_fill_value_dict[sig_pdf_key] = val + self._logger.info( + f'The cap value for the energy PDF ratio key {sig_pdf_key} ' + f'is {val}.') + + # Create cache variables for the last ratio value and gradients in + # order to avoid the recalculation of the ratio value when the + # ``get_gradient`` method is called (usually after the ``get_ratio`` + # method was called). + self._cache_fitparams_hash = None + self._cache_ratio = None + self._cache_gradients = None + + @property + def cap_ratio(self): + """Boolean switch whether to cap the ratio where no background + information is available (True) or use the smallest possible floating + point number greater than zero as background pdf value (False). + """ + return self._cap_ratio + @cap_ratio.setter + def cap_ratio(self, b): + self._cap_ratio = b + + def _get_signal_fitparam_names(self): + """This method must be re-implemented by the derived class and needs to + return the list of signal fit parameter names, this PDF ratio is a + function of. If it returns an empty list, the PDF ratio is independent + of any signal fit parameters. + + Returns + ------- + list of str + The list of the signal fit parameter names, this PDF ratio is a + function of. By default this method returns an empty list indicating + that the PDF ratio depends on no signal parameter. + """ + fitparam_names = self.signalpdfset.fitparams_grid_set.parameter_names + return fitparam_names + + def _is_cached(self, tdm, fitparams_hash): + """Checks if the ratio and gradients for the given set of fit parameters + are already cached. + """ + if((self._cache_fitparams_hash == fitparams_hash) and + (len(self._cache_ratio) == tdm.n_selected_events) + ): + return True + return False + + def _get_ratio_values(self, tdm, gridfitparams, eventdata): + """Select the signal PDF for the given fit parameter grid point and + evaluates the S/B ratio for all the given events. + """ + sig_pdf_key = self.signalpdfset.make_pdf_key(gridfitparams) + + sig_prob = self.signalpdfset.get_pdf(sig_pdf_key).get_prob(tdm) + if isinstance(sig_prob, tuple): + (sig_prob, _) = sig_prob + + bkg_prob = self.backgroundpdf.get_prob(tdm) + if isinstance(bkg_prob, tuple): + (bkg_prob, _) = bkg_prob + + if len(sig_prob) != len(bkg_prob): + raise ValueError( + f'The number of signal ({len(sig_prob)}) and background ' + f'({len(bkg_prob)}) probability values is not equal!') + + m_nonzero_bkg = bkg_prob > 0 + m_zero_bkg = np.invert(m_nonzero_bkg) + if np.any(m_zero_bkg): + ev_idxs = np.where(m_zero_bkg)[0] + self._logger.debug( + f'For {len(ev_idxs)} events the background probability is ' + f'zero. The event indices of these events are: {ev_idxs}') + + ratio = np.empty((len(sig_prob),), dtype=np.double) + ratio[m_nonzero_bkg] = sig_prob[m_nonzero_bkg] / bkg_prob[m_nonzero_bkg] + + if self._cap_ratio: + ratio[m_zero_bkg] = self.ratio_fill_value_dict[sig_pdf_key] + else: + ratio[m_zero_bkg] = (sig_prob[m_zero_bkg] / + np.finfo(np.double).resolution) + + # Check for positive inf values in the ratio and set the ratio to a + # finite number. Here we choose the maximum value of float32 to keep + # room for additional computational operations. + m_inf = np.isposinf(ratio) + ratio[m_inf] = np.finfo(np.float32).max + + return ratio + + def _calculate_ratio_and_gradients(self, tdm, fitparams, fitparams_hash): + """Calculates the ratio values and ratio gradients for all the events + given the fit parameters using the interpolation method for the fit + parameter. It caches the results. + """ + (ratio, gradients) =\ + self._interpolmethod_instance.get_value_and_gradients( + tdm, eventdata=None, params=fitparams) + + # Cache the value and the gradients. + self._cache_fitparams_hash = fitparams_hash + self._cache_ratio = ratio + self._cache_gradients = gradients + + def get_ratio(self, tdm, fitparams=None, tl=None): + """Calculates the PDF ratio values for all the events. + + Parameters + ---------- + tdm : instance of TrialDataManager + The TrialDataManager instance holding the trial data events for + which the PDF ratio values should get calculated. + fitparams : dict | None + The dictionary with the parameter name-value pairs. + It can be ``None``, if the PDF ratio does not depend on any + parameters. + tl : TimeLord instance | None + The optional TimeLord instance that should be used to measure + timing information. + + Returns + ------- + ratios : (N_events,)-shaped 1d numpy ndarray of float + The PDF ratio value for each trial event. + """ + fitparams_hash = make_params_hash(fitparams) + + # Check if the ratio value is already cached. + if self._is_cached(tdm, fitparams_hash): + return self._cache_ratio + + self._calculate_ratio_and_gradients(tdm, fitparams, fitparams_hash) + + return self._cache_ratio + + def get_gradient(self, tdm, fitparams, fitparam_name): + """Retrieves the PDF ratio gradient for the pidx'th fit parameter. + + Parameters + ---------- + tdm : instance of TrialDataManager + The TrialDataManager instance holding the trial event data for which + the PDF ratio gradient values should get calculated. + fitparams : dict + The dictionary with the fit parameter values. + fitparam_name : str + The name of the fit parameter for which the gradient should get + calculated. + """ + fitparams_hash = make_params_hash(fitparams) + + # Convert the fit parameter name into the local fit parameter index. + pidx = self.convert_signal_fitparam_name_into_index(fitparam_name) + + # Check if the gradients have been calculated already. + if self._is_cached(tdm, fitparams_hash): + return self._cache_gradients[pidx] + + # The gradients have not been calculated yet. + self._calculate_ratio_and_gradients(tdm, fitparams, fitparams_hash) + + return self._cache_gradients[pidx] diff --git a/skyllh/analyses/i3/publicdata_ps/scripts/mceq_atm_bkg.py b/skyllh/analyses/i3/publicdata_ps/scripts/mceq_atm_bkg.py new file mode 100644 index 0000000000..9f59c46e63 --- /dev/null +++ b/skyllh/analyses/i3/publicdata_ps/scripts/mceq_atm_bkg.py @@ -0,0 +1,178 @@ +import argparse +import numpy as np +import os.path +import pickle + +import crflux.models as pm +import mceq_config as config +from MCEq.core import MCEqRun + +from skyllh.analyses.i3.publicdata_ps.aeff import PDAeff +from skyllh.datasets.i3 import PublicData_10y_ps + +def create_flux_file(save_path, ds): + """Creates a pickle file containing the flux for the given dataset. + """ + output_filename = ds.get_aux_data_definition('mceq_flux_datafile')[0] + output_pathfilename = '' + if save_path is None: + output_pathfilename = ds.get_abs_pathfilename_list([output_filename])[0] + else: + output_pathfilename = os.path.join( + save_path, output_filename) + + print('Output path filename: %s'%(output_pathfilename)) + + # Load the effective area instance to get the binning information. + aeff = PDAeff( + os.path.join( + ds.root_dir, + ds.get_aux_data_definition('eff_area_datafile')[0] + ) + ) + + # Setup MCeq. + config.e_min = float( + 10**(np.max([aeff._log10_enu_binedges_lower[0], 2]))) + config.e_max = float( + 10**(np.min([aeff._log10_enu_binedges_upper[-1], 9])+0.05)) + + print('E_min = %s'%(config.e_min)) + print('E_max = %s'%(config.e_max)) + + mceq = MCEqRun( + interaction_model="SIBYLL2.3c", + primary_model=(pm.HillasGaisser2012, "H3a"), + theta_deg=0.0, + density_model=("MSIS00_IC", ("SouthPole", "January")), + ) + + print('MCEq log10(e_grid) = %s'%(str(np.log10(mceq.e_grid)))) + + mag = 0 + # Use the same binning as for the effective area. + # theta = delta + pi/2 + print('sin_true_dec_binedges: %s'%(str(aeff.sin_decnu_binedges))) + theta_angles_binedges = np.rad2deg( + np.arcsin(aeff.sin_decnu_binedges) + np.pi/2 + ) + theta_angles = 0.5*(theta_angles_binedges[:-1] + theta_angles_binedges[1:]) + print('Theta angles = %s'%(str(theta_angles))) + + flux_def = dict() + + all_component_names = [ + "numu_conv", + "numu_pr", + "numu_total", + "mu_conv", + "mu_pr", + "mu_total", + "nue_conv", + "nue_pr", + "nue_total", + "nutau_pr", + ] + + # Initialize empty grid + for frac in all_component_names: + flux_def[frac] = np.zeros( + (len(mceq.e_grid), len(theta_angles))) + + # fluxes calculated for different theta_angles + for ti, theta in enumerate(theta_angles): + mceq.set_theta_deg(theta) + mceq.solve() + + # same meaning of prefixes for muon neutrinos as for muons + flux_def["mu_conv"][:, ti] = ( + mceq.get_solution("conv_mu+", mag) + + mceq.get_solution("conv_mu-", mag) + ) + + flux_def["mu_pr"][:, ti] = ( + mceq.get_solution("pr_mu+", mag) + + mceq.get_solution("pr_mu-", mag) + ) + + flux_def["mu_total"][:, ti] = ( + mceq.get_solution("total_mu+", mag) + + mceq.get_solution("total_mu-", mag) + ) + + # same meaning of prefixes for muon neutrinos as for muons + flux_def["numu_conv"][:, ti] = ( + mceq.get_solution("conv_numu", mag) + + mceq.get_solution("conv_antinumu", mag) + ) + + flux_def["numu_pr"][:, ti] = ( + mceq.get_solution("pr_numu", mag) + + mceq.get_solution("pr_antinumu", mag) + ) + + flux_def["numu_total"][:, ti] = ( + mceq.get_solution("total_numu", mag) + + mceq.get_solution("total_antinumu", mag) + ) + + # same meaning of prefixes for electron neutrinos as for muons + flux_def["nue_conv"][:, ti] = ( + mceq.get_solution("conv_nue", mag) + + mceq.get_solution("conv_antinue", mag) + ) + + flux_def["nue_pr"][:, ti] = ( + mceq.get_solution("pr_nue", mag) + + mceq.get_solution("pr_antinue", mag) + ) + + flux_def["nue_total"][:, ti] = ( + mceq.get_solution("total_nue", mag) + + mceq.get_solution("total_antinue", mag) + ) + + # since there are no conventional tau neutrinos, prompt=total + flux_def["nutau_pr"][:, ti] = ( + mceq.get_solution("total_nutau", mag) + + mceq.get_solution("total_antinutau", mag) + ) + print("\U0001F973") + + # Save the result to the output file. + with open(output_pathfilename, 'wb') as f: + pickle.dump(((mceq.e_grid, theta_angles_binedges), flux_def), f) + print('Saved fluxes for dataset %s to: %s'%(ds.name, output_pathfilename)) + +#------------------------------------------------------------------------------- + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description='Generate atmospheric background fluxes with MCEq.' + ) + parser.add_argument( + '-b', + '--data-base-path', + type=str, + default='/data/ana/analyses', + help='The base path of the data repository.' + ) + parser.add_argument( + '-s', + '--save-path', + type=str, + default=None + ) + + args = parser.parse_args() + + dsc = PublicData_10y_ps.create_dataset_collection(args.data_base_path) + + dataset_names = ['IC40', 'IC59', 'IC79', 'IC86_I', 'IC86_II'] + for ds_name in dataset_names: + ds = dsc.get_dataset(ds_name) + create_flux_file( + save_path = args.save_path, + ds=ds + ) diff --git a/skyllh/analyses/i3/publicdata_ps/signal_generator.py b/skyllh/analyses/i3/publicdata_ps/signal_generator.py new file mode 100644 index 0000000000..a36ac2a673 --- /dev/null +++ b/skyllh/analyses/i3/publicdata_ps/signal_generator.py @@ -0,0 +1,658 @@ +# -*- coding: utf-8 -*- + +import numpy as np +from scipy import interpolate +import scipy.stats + +from skyllh.core.py import ( + issequenceof, + float_cast, + int_cast +) +from skyllh.core.py import module_classname +from skyllh.core.debugging import get_logger +from skyllh.core.signal_generator import SignalGeneratorBase +from skyllh.core.llhratio import LLHRatio +from skyllh.core.dataset import Dataset +from skyllh.core.source_hypothesis import SourceHypoGroupManager +from skyllh.core.storage import DataFieldRecordArray + +from skyllh.analyses.i3.publicdata_ps.utils import psi_to_dec_and_ra +from skyllh.analyses.i3.publicdata_ps.smearing_matrix import ( + PDSmearingMatrix +) +from skyllh.analyses.i3.publicdata_ps.aeff import PDAeff + + +class PDDatasetSignalGenerator(object): + """This class provides a signal generation method for a point-like source + seen in the IceCube detector using one dataset of the 10 years public data + release. It is used by the PDSignalGenerator class in a loop over all the + datasets that have been added to the analysis. + """ + + def __init__(self, ds, src_dec, effA=None, sm=None, **kwargs): + """Creates a new instance of the signal generator for generating + signal events from a specific public data dataset. + + Parameters: + ----------- + ds : Dataset instance + Dataset instance for which signal events should get + generated for. + src_dec : float + The declination of the source in radians. + effA : PDAeff | None + Representation of the effective area provided by the public data. + sm : PDSmearingMatrix | None + Representation of the smearing matrix provided by the public data. + """ + super().__init__(**kwargs) + + self._logger = get_logger(module_classname(self)) + + if sm is None: + self.smearing_matrix = PDSmearingMatrix( + pathfilenames=ds.get_abs_pathfilename_list( + ds.get_aux_data_definition('smearing_datafile'))) + else: + self.smearing_matrix = sm + + if effA is None: + dec_idx = self.smearing_matrix.get_true_dec_idx(src_dec) + (min_log_true_e, + max_log_true_e) = \ + self.smearing_matrix.get_true_log_e_range_with_valid_log_e_pdfs( + dec_idx) + kwargs = { + 'src_dec': src_dec, + 'min_log10enu': min_log_true_e, + 'max_log10enu': max_log_true_e + } + self.effA = PDAeff( + pathfilenames=ds.get_abs_pathfilename_list( + ds.get_aux_data_definition('eff_area_datafile')), + **kwargs) + + else: + self.effA = effA + + def _generate_inv_cdf_spline(self, flux_model, log_e_min, + log_e_max): + """Sample the true neutrino energy from the power-law + re-weighted with the detection probability. + """ + m = (self.effA.log10_enu_bincenters >= log_e_min) & ( + self.effA.log10_enu_bincenters < log_e_max) + bin_centers = self.effA.log10_enu_bincenters[m] + low_bin_edges = self.effA._log10_enu_binedges_lower[m] + high_bin_edges = self.effA._log10_enu_binedges_upper[m] + + # Flux probability P(E_nu | gamma) per bin. + flux_prob = flux_model.get_integral( + 10**low_bin_edges, 10**high_bin_edges + ) / flux_model.get_integral( + 10**low_bin_edges[0], 10**high_bin_edges[-1] + ) + + # Do the product and normalize again to a probability per bin. + product = flux_prob * self.effA.det_prob + prob_per_bin = product / np.sum(product) + + # The probability per bin cannot be zero, otherwise the cumulative + # sum would not be increasing monotonically. So we set zero bins to + # 1000 times smaller than the smallest non-zero bin. + m = prob_per_bin == 0 + prob_per_bin[m] = np.min(prob_per_bin[np.invert(m)]) / 1000 + to_keep = np.where(prob_per_bin > 1e-15)[0] # For numerical stability + prob_per_bin = prob_per_bin[to_keep] + prob_per_bin /= np.sum(prob_per_bin) + + # Compute the cumulative distribution CDF. + cum_per_bin = [np.sum(prob_per_bin[:i]) + for i in range(prob_per_bin.size+1)] + if np.any(np.diff(cum_per_bin) == 0): + raise ValueError( + 'The cumulative sum of the true energy probability is not ' + 'monotonically increasing! Values of the cumsum are ' + f'{cum_per_bin}.') + + bin_centers = bin_centers[to_keep] + bin_centers = np.concatenate(([low_bin_edges[0]], bin_centers)) + + # Build a spline for the inverse CDF. + return interpolate.splrep(cum_per_bin, bin_centers, k=1, s=0) + + @staticmethod + def _eval_spline(x, spl): + x = np.asarray(x) + if (x.any() < 0 or x.any() > 1): + raise ValueError( + f'{x} is outside of the valid spline range. ' + 'The valid range is [0,1].') + values = interpolate.splev(x, spl, ext=3) + return values + + def _generate_events( + self, rss, src_dec, src_ra, dec_idx, + log_true_e_inv_cdf_spl, n_events): + """Generates `n_events` signal events for the given source location + and flux model. + + Note: + Some values can be NaN in cases where a PDF was not available! + + Parameters + ---------- + rss : instance of RandomStateService + The instance of RandomStateService to use for drawing random + numbers. + src_dec : float + The declination of the source in radians. + src_ra : float + The right-ascention of the source in radians. + + Returns + ------- + events : numpy record array of size `n_events` + The numpy record array holding the event data. + It contains the following data fields: + - 'isvalid' + - 'log_true_energy' + - 'log_energy' + - 'sin_dec' + Single values can be NaN in cases where a pdf was not available. + """ + + # Create the output event DataFieldRecordArray. + out_dtype = [ + ('isvalid', np.bool_), + ('log_true_energy', np.double), + ('log_energy', np.double), + ('dec', np.double), + ('ra', np.double), + ('sin_dec', np.double), + ('ang_err', np.double), + ('time', int), + ('azi', np.double), + ('zen', np.double), + ('run', int) + ] + + data = dict( + [(out_dt[0], np.empty( + (n_events,), + dtype=out_dt[1]) + ) for out_dt in out_dtype] + ) + + events = DataFieldRecordArray(data, copy=False) + + sm = self.smearing_matrix + + log_true_e = self._eval_spline( + rss.random.uniform(size=n_events), log_true_e_inv_cdf_spl) + + events['log_true_energy'] = log_true_e + + log_true_e_idxs = ( + np.digitize(log_true_e, bins=sm.true_e_bin_edges) - 1 + ) + + # Sample reconstructed energies given true neutrino energies. + (log_e_idxs, log_e) = sm.sample_log_e(rss, dec_idx, log_true_e_idxs) + events['log_energy'] = log_e + + # Sample reconstructed psi values given true neutrino energy and + # reconstructed energy. + (psi_idxs, psi) = sm.sample_psi( + rss, dec_idx, log_true_e_idxs, log_e_idxs) + + # Sample reconstructed ang_err values given true neutrino energy, + # reconstructed energy, and psi. + (ang_err_idxs, ang_err) = sm.sample_ang_err( + rss, dec_idx, log_true_e_idxs, log_e_idxs, psi_idxs) + + isvalid = np.invert( + np.isnan(log_e) | np.isnan(psi) | np.isnan(ang_err)) + events['isvalid'] = isvalid + + # Convert the psf into a set of (r.a. and dec.). Only use non-nan + # values. + (dec, ra) = psi_to_dec_and_ra(rss, src_dec, src_ra, psi[isvalid]) + events['ra'][isvalid] = ra + events['dec'][isvalid] = dec + events['sin_dec'][isvalid] = np.sin(dec) + + # Add an angular error. Only use non-nan values. + events['ang_err'][isvalid] = ang_err[isvalid] + + # Add fields required by the framework + events['time'] = np.ones(n_events) + events['azi'] = np.ones(n_events) + events['zen'] = np.ones(n_events) + events['run'] = -1 * np.ones(n_events) + + return events + + @staticmethod + @np.vectorize + def energy_filter(events, spline, cut_sindec, logger): + """The energy filter will select all events below `cut_sindec` + that have an energy smaller than the energy spline at their + declination. + + Paramters + --------- + events : numpy record array + Numpy record array with the generated signal events. + energy_cut_splines : scipy.interpolate.UnivariateSpline + A spline of E(sin_dec) that defines the declination + dependent energy cut in the IceCube southern sky. + cut_sindec : float + The sine of the declination to start applying the energy cut. + The cut will be applied from this declination down. + logger : logging.Logger + The Logger instance. + + Returns + energy_filter : (len(events),)-shaped numpy ndarray + A mask of shape `len(events)` of the events to be cut. + """ + if cut_sindec is None: + logger.warn( + 'No `cut_sindec` has been specified. The energy cut will be ' + 'applied in [-90, 0] deg.') + cut_sindec = 0. + energy_filter = np.logical_and( + events['sin_dec'] < cut_sindec, + events['log_energy'] < spline(events['sin_dec'])) + + return energy_filter + + def generate_signal_events( + self, rss, src_dec, src_ra, flux_model, n_events, + energy_cut_spline=None, cut_sindec=None): + """Generates ``n_events`` signal events for the given source location + and flux model. + + Paramters + --------- + rss : RandomStateService + src_dec : float + Declination coordinate of the injection point. + src_ra : float + Right ascension coordinate of the injection point. + flux_model : FluxModel + Instance of the `FluxModel` class. + n_events : int + Number of signal events to be generated. + energy_cut_splines : scipy.interpolate.UnivariateSpline + A spline of E(sin_dec) that defines the declination + dependent energy cut in the IceCube southern sky. + cut_sindec : float + The sine of the declination to start applying the energy cut. + The cut will be applied from this declination down. + + Returns + ------- + events : numpy record array + The numpy record array holding the event data. + It contains the following data fields: + - 'isvalid' + - 'log_true_energy' + - 'log_energy' + - 'dec' + - 'ra' + - 'ang_err' + """ + sm = self.smearing_matrix + + # Find the declination bin index. + dec_idx = sm.get_true_dec_idx(src_dec) + + # Determine the true energy range for which log_e PDFs are available. + (min_log_true_e, + max_log_true_e) = sm.get_true_log_e_range_with_valid_log_e_pdfs( + dec_idx) + # Build the spline for the inverse CDF and draw a true neutrino + # energy from the hypothesis spectrum. + log_true_e_inv_cdf_spl = self._generate_inv_cdf_spline( + flux_model, min_log_true_e, max_log_true_e) + + events = None + n_evt_generated = 0 + while n_evt_generated != n_events: + n_evt = n_events - n_evt_generated + + events_ = self._generate_events( + rss, src_dec, src_ra, dec_idx, log_true_e_inv_cdf_spl, n_evt) + + # Cut events that failed to be generated due to missing PDFs. + # Also cut low energy events if generating in the southern sky. + events_ = events_[events_['isvalid']] + if energy_cut_spline is not None: + to_cut = self.energy_filter( + events_, energy_cut_spline, cut_sindec, self._logger) + events_ = events_[~to_cut] + if not len(events_) == 0: + n_evt_generated += len(events_) + if events is None: + events = events_ + else: + events.append(events_) + + return events + + +class PDSignalGenerator(SignalGeneratorBase): + """This class provides a signal generation method for a point-like source + seen in the IceCube detector using the 10 years public data release. + """ + + def __init__(self, src_hypo_group_manager, dataset_list, data_list=None, + llhratio=None, energy_cut_splines=None, cut_sindec=None): + """Constructs a new signal generator instance. + + Parameters + ---------- + src_hypo_group_manager : SourceHypoGroupManager instance + The SourceHypoGroupManager instance defining the source hypothesis + groups. + dataset_list : list of Dataset instances + The list of Dataset instances for which signal events should get + generated for. + data_list : list of DatasetData instances + The list of DatasetData instances holding the actual data of each + dataset. The order must match the order of ``dataset_list``. + llhratio : LLHRatio + The likelihood ratio object contains the datasets signal weights + needed for distributing the event generation among the different + datasets. + energy_cut_splines : list of UnivariateSpline + A list of splines of E(sin_dec) used to define the declination + dependent energy cut in the IceCube southern sky. + cut_sindec : list of float + The sine of the declination to start applying the energy cut. + The cut will be applied from this declination down. + """ + self.src_hypo_group_manager = src_hypo_group_manager + self.dataset_list = dataset_list + self.data_list = data_list + self.llhratio = llhratio + self.effA = [None] * len(self._dataset_list) + self.sm = [None] * len(self._dataset_list) + self.splines = energy_cut_splines + self.cut_sindec = cut_sindec + + @property + def src_hypo_group_manager(self): + """The SourceHypoGroupManager instance defining the source groups with + their spectra. + """ + return self._src_hypo_group_manager + + @src_hypo_group_manager.setter + def src_hypo_group_manager(self, manager): + if(not isinstance(manager, SourceHypoGroupManager)): + raise TypeError('The src_hypo_group_manager property must be an ' + 'instance of SourceHypoGroupManager!') + self._src_hypo_group_manager = manager + + @property + def dataset_list(self): + """The list of Dataset instances for which signal events should get + generated for. + """ + return self._dataset_list + + @dataset_list.setter + def dataset_list(self, datasets): + if(not issequenceof(datasets, Dataset)): + raise TypeError('The dataset_list property must be a sequence of ' + 'Dataset instances!') + self._dataset_list = list(datasets) + + @property + def llhratio(self): + """The log-likelihood ratio function for the analysis. + """ + return self._llhratio + + @llhratio.setter + def llhratio(self, llhratio): + if llhratio is not None: + if(not isinstance(llhratio, LLHRatio)): + raise TypeError('The llratio property must be an instance of ' + 'LLHRatio!') + self._llhratio = llhratio + + def generate_signal_events(self, rss, mean, poisson=True): + shg_list = self._src_hypo_group_manager.src_hypo_group_list + # Only supports a single source hypothesis group. Raise an error + # if more than one shg is in the source hypo group manager. + if len(shg_list) > 1: + raise RuntimeError( + 'Signal injection for multiple source hypothesis groups is ' + 'not supported yet.') + + tot_n_events = 0 + signal_events_dict = {} + + for shg in shg_list: + # Only supports single point source signal injection. Raise + # an error if more than one source is in the source hypo group. + if len(shg.source_list) > 1: + raise RuntimeError( + 'Signal injection for multiple sources within a source ' + 'hypothesis group is not supported yet.') + # This only works with power-laws for now. + # Each source hypo group can have a different power-law + gamma = shg.fluxmodel.gamma + weights, _ = self.llhratio.dataset_signal_weights([mean, gamma]) + for (ds_idx, w) in enumerate(weights): + w_mean = mean * w + if(poisson): + n_events = rss.random.poisson( + float_cast( + w_mean, + '`mean` must be castable to type of float!' + ) + ) + else: + n_events = int_cast( + w_mean, + '`mean` must be castable to type of int!' + ) + tot_n_events += n_events + + events_ = None + for (shg_src_idx, src) in enumerate(shg.source_list): + ds = self._dataset_list[ds_idx] + sig_gen = PDDatasetSignalGenerator( + ds, src.dec, self.effA[ds_idx], self.sm[ds_idx]) + if self.effA[ds_idx] is None: + self.effA[ds_idx] = sig_gen.effA + if self.sm[ds_idx] is None: + self.sm[ds_idx] = sig_gen.smearing_matrix + # ToDo: here n_events should be split according to some + # source weight + events_ = sig_gen.generate_signal_events( + rss, + src.dec, + src.ra, + shg.fluxmodel, + n_events, + energy_cut_spline=self.splines[ds_idx], + cut_sindec=self.cut_sindec[ds_idx] + ) + if events_ is None: + continue + + if shg_src_idx == 0: + signal_events_dict[ds_idx] = events_ + else: + signal_events_dict[ds_idx].append(events_) + + return tot_n_events, signal_events_dict + + +class PDTimeDependentSignalGenerator(PDSignalGenerator): + """ The time dependent signal generator works so far only for one single + dataset. For multi datasets one needs to adjust the dataset weights + accordingly (scaling of the effective area with livetime of the flare in + the dataset). + """ + + def __init__(self, src_hypo_group_manager, dataset_list, data_list=None, + llhratio=None, energy_cut_splines=None, cut_sindec=None, + gauss=None, box=None): + """ + Parameters + ---------- + src_hypo_group_manager : SourceHypoGroupManager instance + The instance of SourceHypoGroupManager that defines the list of + sources, i.e. the list of SourceModel instances. + dataset_list : list of Dataset instances + The list of Dataset instances for which signal events should get + generated for. + data_list : list of DatasetData instances + The list of DatasetData instances holding the actual data of each + dataset. The order must match the order of ``dataset_list``. + llhratio : LLHRatio + The likelihood ratio object contains the datasets signal weights + needed for distributing the event generation among the different + datsets. + energy_cut_splines : list of UnivariateSpline + cut_sindec : float + gauss : dict | None + None or dictionary with {"mu": float, "sigma": float}. + box : dict | None + None or dictionary with {"start": float, "end": float}. + """ + if gauss is None and box is None: + raise ValueError( + "Either box or gauss keywords must define the neutrino flare.") + if gauss is not None and box is not None: + raise ValueError( + "Either box or gauss keywords must define the neutrino flare, " + "cannot use both.") + + super().__init__(src_hypo_group_manager, dataset_list, data_list, + llhratio, energy_cut_splines, cut_sindec) + self.box = box + self.gauss = gauss + + self.time_pdf = self._get_time_pdf() + + def _get_time_pdf(self): + """Get the neutrino flare time pdf given parameters. + Will be used to generate random numbers by calling `rvs()` method. + + Returns + ------- + time_pdf : instance of scipy.stats.rv_continuous base class + Has to base scipy.stats.rv_continuous. + """ + # Make sure flare is in dataset. + for data_list in self.data_list: + grl = data_list.grl + + if self.gauss is not None: + if (self.gauss["mu"] - 4 * self.gauss["sigma"] > grl["stop"][-1]) or ( + self.gauss["mu"] + 4 * self.gauss["sigma"] < grl["start"][0]): + raise ValueError( + f"Gaussian {str(self.gauss)} flare is not in dataset.") + + if self.box is not None: + if (self.box["start"] > grl["stop"][-1]) or ( + self.box["end"] < grl["start"][0]): + raise ValueError( + f"Box {str(self.box)} flare is not in dataset.") + + # Create `time_pdf`. + if self.gauss is not None: + time_pdf = scipy.stats.norm(self.gauss["mu"], self.gauss["sigma"]) + if self.box is not None: + time_pdf = scipy.stats.uniform( + self.box["start"], + self.box["end"] - self.box["start"] + ) + + return time_pdf + + def set_flare(self, gauss=None, box=None): + """Set the neutrino flare given parameters. + + Parameters + ---------- + gauss : dict | None + None or dictionary with {"mu": float, "sigma": float}. + box : dict | None + None or dictionary with {"start": float, "end": float}. + """ + if gauss is None and box is None: + raise ValueError( + "Either box or gauss keywords must define the neutrino flare.") + if gauss is not None and box is not None: + raise ValueError( + "Either box or gauss keywords must define the neutrino flare, " + "cannot use both.") + + self.box = box + self.gauss = gauss + + self.time_pdf = self._get_time_pdf() + + def is_in_grl(self, time, grl): + """Helper function to check if given times are in the grl ontime. + + Parameters + ---------- + time : 1d ndarray + Time values. + grl : ndarray + Array of the detector good run list. + + Returns + ------- + is_in_grl : 1d ndarray + Boolean mask of `time` in grl ontime. + """ + def f(time, grl): + return np.any((grl["start"] <= time) & (time <= grl["stop"])) + + # Vectorize `f`, but exclude `grl` argument from vectorization. + # This is needed to support `time` as an array argument. + f_v = np.vectorize(f, excluded=[1]) + is_in_grl = f_v(time, grl) + + return is_in_grl + + def generate_signal_events(self, rss, mean, poisson=True): + """Same as in PDSignalGenerator, but we assign times here. + """ + # Call method from the parent class to generate signal events. + (tot_n_events, signal_events_dict) = super().generate_signal_events( + rss, mean, poisson=poisson) + + # Assign times for flare. We can also use inverse transform + # sampling instead of the lazy version implemented here. + for (ds_idx, events_) in signal_events_dict.items(): + grl = self.data_list[ds_idx].grl + + # Optimized time injection version, based on csky implementation. + # https://github.com/icecube/csky/blob/7e969639c5ef6dbb42872dac9b761e1e8b0ccbe2/csky/inj.py#L1122 + times = np.array([]) + n_events = len(events_) + while len(times) < n_events: + times = np.concatenate( + (times, self.time_pdf.rvs(n_events - len(times), + random_state=rss.random)) + ) + # Check if times is in grl. + is_in_grl_mask = self.is_in_grl(times, grl) + times = times[is_in_grl_mask] + + events_["time"] = times + return tot_n_events, signal_events_dict diff --git a/skyllh/analyses/i3/publicdata_ps/signalpdf.py b/skyllh/analyses/i3/publicdata_ps/signalpdf.py new file mode 100644 index 0000000000..474e2a976c --- /dev/null +++ b/skyllh/analyses/i3/publicdata_ps/signalpdf.py @@ -0,0 +1,421 @@ +# -*- coding: utf-8 -*- + +import numpy as np +from scipy import integrate + +from skyllh.core.py import module_classname +from skyllh.core.debugging import get_logger +from skyllh.core.timing import TaskTimer +from skyllh.core.binning import get_bincenters_from_binedges +from skyllh.core.pdf import ( + PDF, + PDFAxis, + PDFSet, + IsSignalPDF, +) +from skyllh.core.multiproc import ( + IsParallelizable, + parallelize +) +from skyllh.core.parameters import ( + ParameterGrid, + ParameterGridSet +) +from skyllh.i3.dataset import I3Dataset +from skyllh.physics.flux import FluxModel + +from skyllh.analyses.i3.publicdata_ps.aeff import PDAeff +from skyllh.analyses.i3.publicdata_ps.utils import ( + FctSpline1D, +) +from skyllh.analyses.i3.publicdata_ps.smearing_matrix import ( + PDSmearingMatrix +) + + +class PDSignalEnergyPDF(PDF, IsSignalPDF): + """This class provides a signal energy PDF for a spectrial index value. + """ + def __init__( + self, f_e_spl, **kwargs): + """Creates a new signal energy PDF instance for a particular spectral + index value. + + Parameters + ---------- + f_e_spl : FctSpline1D instance + The FctSpline1D instance representing the spline of the energy PDF. + """ + super().__init__(**kwargs) + + if not isinstance(f_e_spl, FctSpline1D): + raise TypeError( + 'The f_e_spl argument must be an instance of FctSpline1D!') + + self.f_e_spl = f_e_spl + + self.log10_reco_e_lower_binedges = self.f_e_spl.x_binedges[:-1] + self.log10_reco_e_upper_binedges = self.f_e_spl.x_binedges[1:] + + self.log10_reco_e_min = self.log10_reco_e_lower_binedges[0] + self.log10_reco_e_max = self.log10_reco_e_upper_binedges[-1] + + # Add the PDF axes. + self.add_axis(PDFAxis( + name='log_energy', + vmin=self.log10_reco_e_min, + vmax=self.log10_reco_e_max) + ) + + # Check integrity. + integral = integrate.quad( + self.f_e_spl.evaluate, + self.log10_reco_e_min, + self.log10_reco_e_max, + limit=200, + full_output=1 + )[0] / self.f_e_spl.norm + if not np.isclose(integral, 1): + raise ValueError( + 'The integral over log10_reco_e of the energy term must be ' + 'unity! But it is {}!'.format(integral)) + + def assert_is_valid_for_trial_data(self, tdm): + pass + + def get_pd_by_log10_reco_e(self, log10_reco_e, tl=None): + """Calculates the probability density for the given log10(E_reco/GeV) + values using the spline representation of the PDF. + + Parameters + ---------- + log10_reco_e : (n_log10_reco_e,)-shaped 1D numpy ndarray + The numpy ndarray holding the log10(E_reco/GeV) values for which + the energy PDF should get evaluated. + tl : TimeLord instance | None + The optional TimeLord instance that should be used to measure + timing information. + + Returns + ------- + pd : (N_events,)-shaped numpy ndarray + The 1D numpy ndarray with the probability density for each event. + """ + # Select events that actually have a signal energy PDF. + # All other events will get zero signal probability density. + m = ( + (log10_reco_e >= self.log10_reco_e_min) & + (log10_reco_e < self.log10_reco_e_max) + ) + + with TaskTimer(tl, 'Evaluate PDSignalEnergyPDF'): + pd = np.zeros((len(log10_reco_e),), dtype=np.double) + pd[m] = self.f_e_spl(log10_reco_e[m]) / self.f_e_spl.norm + + return pd + + def get_prob(self, tdm, params=None, tl=None): + """Calculates the probability density for the events given by the + TrialDataManager. + + Parameters + ---------- + tdm : TrialDataManager instance + The TrialDataManager instance holding the data events for which the + probability should be looked up. The following data fields are + required: + - 'log_energy' + The log10 of the reconstructed energy. + params : dict | None + The dictionary containing the parameter names and values for which + the probability should get calculated. + By definition this PDF does not depend on parameters. + tl : TimeLord instance | None + The optional TimeLord instance that should be used to measure + timing information. + + Returns + ------- + pd : (N_events,)-shaped numpy ndarray + The 1D numpy ndarray with the probability density for each event. + grads : (N_fitparams,N_events)-shaped ndarray | None + The 2D numpy ndarray holding the gradients of the PDF w.r.t. + each fit parameter for each event. The order of the gradients + is the same as the order of floating parameters specified through + the ``param_set`` property. + It is ``None``, if this PDF does not depend on any parameters. + """ + log10_reco_e = tdm.get_data('log_energy') + + pd = self.get_pd_by_log10_reco_e(log10_reco_e, tl=tl) + + return (pd, None) + + +class PDSignalEnergyPDFSet(PDFSet, IsSignalPDF, IsParallelizable): + """This class provides a signal energy PDF set for the public data. + It creates a set of PDSignalEnergyPDF instances, one for each spectral + index value on a grid. + """ + def __init__( + self, + ds, + src_dec, + flux_model, + fitparam_grid_set, + ncpu=None, + ppbar=None, + **kwargs): + """Creates a new PDSignalEnergyPDFSet instance for the public data. + + Parameters + ---------- + ds : I3Dataset instance + The I3Dataset instance that defines the dataset of the public data. + src_dec : float + The declination of the source in radians. + flux_model : FluxModel instance + The FluxModel instance that defines the source's flux model. + fitparam_grid_set : ParameterGrid | ParameterGridSet instance + The parameter grid set defining the grids of the fit parameters. + ncpu : int | None + The number of CPUs to utilize. Global setting will take place if + not specified, i.e. set to None. + ppbar : ProgressBar instance | None + The instance of ProgressBar for the optional parent progress bar. + """ + self._logger = get_logger(module_classname(self)) + + # Check for the correct types of the arguments. + if not isinstance(ds, I3Dataset): + raise TypeError( + 'The ds argument must be an instance of I3Dataset!') + + if not isinstance(flux_model, FluxModel): + raise TypeError( + 'The flux_model argument must be an instance of FluxModel!') + + if (not isinstance(fitparam_grid_set, ParameterGrid)) and\ + (not isinstance(fitparam_grid_set, ParameterGridSet)): + raise TypeError( + 'The fitparam_grid_set argument must be an instance of type ' + 'ParameterGrid or ParameterGridSet!') + + # Extend the fitparam_grid_set to allow for parameter interpolation + # values at the grid edges. + fitparam_grid_set = fitparam_grid_set.copy() + fitparam_grid_set.add_extra_lower_and_upper_bin() + + super().__init__( + pdf_type=PDF, + fitparams_grid_set=fitparam_grid_set, + ncpu=ncpu + ) + + # Load the smearing matrix. + sm = PDSmearingMatrix( + pathfilenames=ds.get_abs_pathfilename_list( + ds.get_aux_data_definition('smearing_datafile'))) + + # Select the slice of the smearing matrix corresponding to the + # source declination band. + true_dec_idx = sm.get_true_dec_idx(src_dec) + sm_pdf = sm.pdf[:, true_dec_idx] + + # Only look at true neutrino energies for which a recostructed + # muon energy distribution exists in the smearing matrix. + (min_log_true_e, + max_log_true_e) = sm.get_true_log_e_range_with_valid_log_e_pdfs( + true_dec_idx) + log_true_e_mask = np.logical_and( + sm.log10_true_enu_binedges >= min_log_true_e, + sm.log10_true_enu_binedges <= max_log_true_e) + true_enu_binedges = np.power( + 10, sm.log10_true_enu_binedges[log_true_e_mask]) + true_enu_binedges_lower = true_enu_binedges[:-1] + true_enu_binedges_upper = true_enu_binedges[1:] + valid_true_e_idxs = [sm.get_log10_true_e_idx(0.5 * (he + le)) + for he,le in zip( + sm.log10_true_enu_binedges[log_true_e_mask][1:], + sm.log10_true_enu_binedges[log_true_e_mask][:-1]) + ] + + xvals_binedges = ds.get_binning_definition('log_energy').binedges + xvals = get_bincenters_from_binedges(xvals_binedges) + + # Calculate the neutrino enegry bin widths in GeV. + d_enu = np.diff(true_enu_binedges) + self._logger.debug( + 'dE_nu = {}'.format(d_enu) + ) + + # Load the effective area. + aeff = PDAeff( + pathfilenames=ds.get_abs_pathfilename_list( + ds.get_aux_data_definition('eff_area_datafile'))) + + # Calculate the probability to detect a neutrino of energy + # E_nu given a neutrino declination: p(E_nu|dec). + det_prob = aeff.get_detection_prob_for_decnu( + decnu=src_dec, + enu_min=true_enu_binedges[:-1], + enu_max=true_enu_binedges[1:], + enu_range_min=true_enu_binedges[0], + enu_range_max=true_enu_binedges[-1] + ) + + self._logger.debug('det_prob = {}, sum = {}'.format( + det_prob, np.sum(det_prob))) + + if not np.isclose(np.sum(det_prob), 1): + self._logger.warn( + 'The sum of the detection probabilities is not unity! It is ' + '{}.'.format(np.sum(det_prob))) + + psi_edges_bw = sm.psi_upper_edges - sm.psi_lower_edges + ang_err_bw = sm.ang_err_upper_edges - sm.ang_err_lower_edges + + # Create the energy pdf for different gamma values. + def create_energy_pdf(sm_pdf, flux_model, gridfitparams): + """Creates an energy pdf for a specific gamma value. + """ + # Create a copy of the FluxModel with the given flux parameters. + # The copy is needed to not interfer with other CPU processes. + my_flux_model = flux_model.copy(newprop=gridfitparams) + + self._logger.debug( + 'Generate signal energy PDF for parameters {} in {} E_nu ' + 'bins.'.format( + gridfitparams, len(valid_true_e_idxs)) + ) + + # Calculate the flux probability p(E_nu|gamma). + flux_prob = ( + my_flux_model.get_integral( + true_enu_binedges_lower, + true_enu_binedges_upper + ) / + my_flux_model.get_integral( + true_enu_binedges[0], + true_enu_binedges[-1] + ) + ) + if not np.isclose(np.sum(flux_prob), 1): + self._logger.warn( + 'The sum of the flux probabilities is not unity! It is ' + '{}.'.format(np.sum(flux_prob))) + + self._logger.debug( + 'flux_prob = {}, sum = {}'.format( + flux_prob, np.sum(flux_prob)) + ) + + p = flux_prob * det_prob + + true_e_prob = p / np.sum(p) + + self._logger.debug( + 'true_e_prob = {}'.format( + true_e_prob)) + + def create_reco_e_pdf_for_true_e(idx, true_e_idx): + """This functions creates a spline for the reco energy + distribution given a true neutrino engery. + """ + # Create the energy PDF f_e = P(log10_E_reco|dec) = + # \int dPsi dang_err P(E_reco,Psi,ang_err). + f_e = np.sum( + sm_pdf[true_e_idx] * + psi_edges_bw[true_e_idx, true_dec_idx, :, :, np.newaxis] * + ang_err_bw[true_e_idx, true_dec_idx, :, :, :], + axis=(-1, -2) + ) + + # Build the spline for this P(E_reco|E_nu). Weigh the pdf + # with the true neutrino energy probability (flux prob). + log10_reco_e_binedges = sm.log10_reco_e_binedges[ + true_e_idx, true_dec_idx] + + p = f_e * true_e_prob[idx] + + spline = FctSpline1D(p, log10_reco_e_binedges) + + return spline(xvals) + + # Integrate over the true neutrino energy and spline the output. + sum_pdf = np.sum([ + create_reco_e_pdf_for_true_e(i, true_e_idx) + for i,true_e_idx in enumerate(valid_true_e_idxs) + ], axis=0) + + spline = FctSpline1D(sum_pdf, xvals_binedges, norm=True) + + pdf = PDSignalEnergyPDF(spline) + + return pdf + + args_list = [ + ((sm_pdf, flux_model, gridfitparams), {}) + for gridfitparams in self.gridfitparams_list + ] + + pdf_list = parallelize( + create_energy_pdf, + args_list, + ncpu=self.ncpu, + ppbar=ppbar) + + del(sm_pdf) + + # Save all the energy PDF objects in the PDFSet PDF registry with + # the hash of the individual parameters as key. + for (gridfitparams, pdf) in zip(self.gridfitparams_list, pdf_list): + self.add_pdf(pdf, gridfitparams) + + def get_prob(self, tdm, gridfitparams, tl=None): + """Calculates the signal probability density of each event for the + given set of signal fit parameters on a grid. + + Parameters + ---------- + tdm : instance of TrialDataManager + The TrialDataManager instance holding the data events for which the + probability should be calculated for. The following data fields must + exist: + + - 'log_energy' + The log10 of the reconstructed energy. + - 'psi' + The opening angle from the source to the event in radians. + - 'ang_err' + The angular error of the event in radians. + gridfitparams : dict + The dictionary holding the signal parameter values for which the + signal energy probability should be calculated. Note, that the + parameter values must match a set of parameter grid values for which + a PDSignalPDF object has been created at construction time of this + PDSignalPDFSet object. + tl : TimeLord instance | None + The optional TimeLord instance that should be used to measure time. + + Returns + ------- + prob : 1d ndarray + The array with the signal energy probability for each event. + grads : (N_fitparams,N_events)-shaped ndarray | None + The 2D numpy ndarray holding the gradients of the PDF w.r.t. + each fit parameter for each event. The order of the gradients + is the same as the order of floating parameters specified through + the ``param_set`` property. + It is ``None``, if this PDF does not depend on any parameters. + + Raises + ------ + KeyError + If no energy PDF can be found for the given signal parameter values. + """ + pdf = self.get_pdf(gridfitparams) + + (prob, grads) = pdf.get_prob(tdm, tl=tl) + + return (prob, grads) diff --git a/skyllh/analyses/i3/publicdata_ps/smearing_matrix.py b/skyllh/analyses/i3/publicdata_ps/smearing_matrix.py new file mode 100644 index 0000000000..7b801855cd --- /dev/null +++ b/skyllh/analyses/i3/publicdata_ps/smearing_matrix.py @@ -0,0 +1,885 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +from skyllh.core.storage import create_FileLoader + +def load_smearing_histogram(pathfilenames): + """Loads the 5D smearing histogram from the given data file. + + Parameters + ---------- + pathfilenames : str | list of str + The file name of the data file. + + Returns + ------- + histogram : 5d ndarray + The 5d histogram array holding the probability values of the smearing + matrix. + The axes are (true_e, true_dec, reco_e, psi, ang_err). + true_e_bin_edges : 1d ndarray + The ndarray holding the bin edges of the true energy axis. + true_dec_bin_edges : 1d ndarray + The ndarray holding the bin edges of the true declination axis in + radians. + reco_e_lower_edges : 3d ndarray + The 3d ndarray holding the lower bin edges of the reco energy axis. + For each pair of true_e and true_dec different reco energy bin edges + are provided. + The shape is (n_true_e, n_true_dec, n_reco_e). + reco_e_upper_edges : 3d ndarray + The 3d ndarray holding the upper bin edges of the reco energy axis. + For each pair of true_e and true_dec different reco energy bin edges + are provided. + The shape is (n_true_e, n_true_dec, n_reco_e). + psi_lower_edges : 4d ndarray + The 4d ndarray holding the lower bin edges of the psi axis in radians. + The shape is (n_true_e, n_true_dec, n_reco_e, n_psi). + psi_upper_edges : 4d ndarray + The 4d ndarray holding the upper bin edges of the psi axis in radians. + The shape is (n_true_e, n_true_dec, n_reco_e, n_psi). + ang_err_lower_edges : 5d ndarray + The 5d ndarray holding the lower bin edges of the angular error axis + in radians. + The shape is (n_true_e, n_true_dec, n_reco_e, n_psi, n_ang_err). + ang_err_upper_edges : 5d ndarray + The 5d ndarray holding the upper bin edges of the angular error axis + in radians. + The shape is (n_true_e, n_true_dec, n_reco_e, n_psi, n_ang_err). + """ + # Load the smearing data from the public dataset. + loader = create_FileLoader(pathfilenames=pathfilenames) + data = loader.load_data() + # Rename the data fields. + renaming_dict = { + 'log10(E_nu/GeV)_min': 'true_e_min', + 'log10(E_nu/GeV)_max': 'true_e_max', + 'Dec_nu_min[deg]': 'true_dec_min', + 'Dec_nu_max[deg]': 'true_dec_max', + 'log10(E/GeV)_min': 'e_min', + 'log10(E/GeV)_max': 'e_max', + 'PSF_min[deg]': 'psi_min', + 'PSF_max[deg]': 'psi_max', + 'AngErr_min[deg]': 'ang_err_min', + 'AngErr_max[deg]': 'ang_err_max', + 'Fractional_Counts': 'norm_counts' + } + data.rename_fields(renaming_dict) + + def _get_nbins_from_edges(lower_edges, upper_edges): + """Helper function to extract the number of bins from the data's + bin edges. + """ + n = 0 + # Select only valid rows. + mask = (upper_edges - lower_edges) > 0 + data = lower_edges[mask] + # Go through the valid rows and search for the number of increasing + # bin edge values. + v0 = None + for v in data: + if(v0 is not None and v < v0): + # Reached the end of the edges block. + break + if(v0 is None or v > v0): + v0 = v + n += 1 + return n + + true_e_bin_edges = np.union1d( + data['true_e_min'], data['true_e_max']) + true_dec_bin_edges = np.union1d( + data['true_dec_min'], data['true_dec_max']) + + n_true_e = len(true_e_bin_edges) - 1 + n_true_dec = len(true_dec_bin_edges) - 1 + + n_reco_e = _get_nbins_from_edges( + data['e_min'], data['e_max']) + n_psi = _get_nbins_from_edges( + data['psi_min'], data['psi_max']) + n_ang_err = _get_nbins_from_edges( + data['ang_err_min'], data['ang_err_max']) + + # Get reco energy bin_edges as a 3d array. + idxs = np.array( + range(len(data)) + ) % (n_psi * n_ang_err) == 0 + + reco_e_lower_edges = np.reshape( + data['e_min'][idxs], + (n_true_e, n_true_dec, n_reco_e) + ) + reco_e_upper_edges = np.reshape( + data['e_max'][idxs], + (n_true_e, n_true_dec, n_reco_e) + ) + + # Get psi bin_edges as a 4d array. + idxs = np.array( + range(len(data)) + ) % n_ang_err == 0 + + psi_lower_edges = np.reshape( + data['psi_min'][idxs], + (n_true_e, n_true_dec, n_reco_e, n_psi) + ) + psi_upper_edges = np.reshape( + data['psi_max'][idxs], + (n_true_e, n_true_dec, n_reco_e, n_psi) + ) + + # Get angular error bin_edges as a 5d array. + ang_err_lower_edges = np.reshape( + data['ang_err_min'], + (n_true_e, n_true_dec, n_reco_e, n_psi, n_ang_err) + ) + ang_err_upper_edges = np.reshape( + data['ang_err_max'], + (n_true_e, n_true_dec, n_reco_e, n_psi, n_ang_err) + ) + + # Create 5D histogram for the probabilities. + histogram = np.reshape( + data['norm_counts'], + ( + n_true_e, + n_true_dec, + n_reco_e, + n_psi, + n_ang_err + ) + ) + + # Convert degrees into radians. + true_dec_bin_edges = np.radians(true_dec_bin_edges) + psi_lower_edges = np.radians(psi_lower_edges) + psi_upper_edges = np.radians(psi_upper_edges) + ang_err_lower_edges = np.radians(ang_err_lower_edges) + ang_err_upper_edges = np.radians(ang_err_upper_edges) + + return ( + histogram, + true_e_bin_edges, + true_dec_bin_edges, + reco_e_lower_edges, + reco_e_upper_edges, + psi_lower_edges, + psi_upper_edges, + ang_err_lower_edges, + ang_err_upper_edges + ) + + +class PDSmearingMatrix(object): + """This class is a helper class for dealing with the smearing matrix + provided by the public data. + """ + def __init__( + self, pathfilenames, **kwargs): + """Creates a smearing matrix instance by loading the smearing matrix + from the given file. + """ + super().__init__(**kwargs) + + ( + self.histogram, + self._true_e_bin_edges, + self._true_dec_bin_edges, + self.reco_e_lower_edges, + self.reco_e_upper_edges, + self.psi_lower_edges, + self.psi_upper_edges, + self.ang_err_lower_edges, + self.ang_err_upper_edges + ) = load_smearing_histogram(pathfilenames) + + self.n_psi_bins = self.histogram.shape[3] + self.n_ang_err_bins = self.histogram.shape[4] + + # Create bin edges array for log10_reco_e. + s = np.array(self.reco_e_lower_edges.shape) + s[-1] += 1 + self.log10_reco_e_binedges = np.empty(s, dtype=np.double) + self.log10_reco_e_binedges[...,:-1] = self.reco_e_lower_edges + self.log10_reco_e_binedges[...,-1] = self.reco_e_upper_edges[...,-1] + + # Create bin edges array for psi. + s = np.array(self.psi_lower_edges.shape) + s[-1] += 1 + self.psi_binedges = np.empty(s, dtype=np.double) + self.psi_binedges[...,:-1] = self.psi_lower_edges + self.psi_binedges[...,-1] = self.psi_upper_edges[...,-1] + + # Create bin edges array for ang_err. + s = np.array(self.ang_err_lower_edges.shape) + s[-1] += 1 + self.ang_err_binedges = np.empty(s, dtype=np.double) + self.ang_err_binedges[...,:-1] = self.ang_err_lower_edges + self.ang_err_binedges[...,-1] = self.ang_err_upper_edges[...,-1] + + @property + def n_log10_true_e_bins(self): + """(read-only) The number of log10 true energy bins. + """ + return len(self._true_e_bin_edges) - 1 + + @property + def true_e_bin_edges(self): + """(read-only) The (n_true_e+1,)-shaped 1D numpy ndarray holding the + bin edges of the true energy. + + Depricated! Use log10_true_enu_binedges instead! + """ + return self._true_e_bin_edges + + @property + def true_e_bin_centers(self): + """(read-only) The (n_true_e,)-shaped 1D numpy ndarray holding the bin + center values of the true energy. + """ + return 0.5*(self._true_e_bin_edges[:-1] + + self._true_e_bin_edges[1:]) + + @property + def log10_true_enu_binedges(self): + """(read-only) The (n_log10_true_enu+1,)-shaped 1D numpy ndarray holding + the bin edges of the log10 true neutrino energy. + """ + return self._true_e_bin_edges + + @property + def n_true_dec_bins(self): + """(read-only) The number of true declination bins. + """ + return len(self._true_dec_bin_edges) - 1 + + @property + def true_dec_bin_edges(self): + """(read-only) The (n_true_dec+1,)-shaped 1D numpy ndarray holding the + bin edges of the true declination. + """ + return self._true_dec_bin_edges + + @property + def true_dec_bin_centers(self): + """(read-only) The (n_true_dec,)-shaped 1D ndarray holding the bin + center values of the true declination. + """ + return 0.5*(self._true_dec_bin_edges[:-1] + + self._true_dec_bin_edges[1:]) + + @property + def log10_reco_e_binedges_lower(self): + """(read-only) The upper bin edges of the log10 reco energy axes. + """ + return self.reco_e_lower_edges + + @property + def log10_reco_e_binedges_upper(self): + """(read-only) The upper bin edges of the log10 reco energy axes. + """ + return self.reco_e_upper_edges + + @property + def min_log10_reco_e(self): + """(read-only) The minimum value of the reconstructed energy axis. + """ + # Select only valid reco energy bins with bin widths greater than zero. + m = (self.reco_e_upper_edges - self.reco_e_lower_edges) > 0 + return np.min(self.reco_e_lower_edges[m]) + + @property + def max_log10_reco_e(self): + """(read-only) The maximum value of the reconstructed energy axis. + """ + # Select only valid reco energy bins with bin widths greater than zero. + m = (self.reco_e_upper_edges - self.reco_e_lower_edges) > 0 + return np.max(self.reco_e_upper_edges[m]) + + @property + def min_log10_psi(self): + """(read-only) The minimum log10 value of the psi axis. + """ + # Select only valid psi bins with bin widths greater than zero. + m = (self.psi_upper_edges - self.psi_lower_edges) > 0 + return np.min(np.log10(self.psi_lower_edges[m])) + + @property + def max_log10_psi(self): + """(read-only) The maximum log10 value of the psi axis. + """ + # Select only valid psi bins with bin widths greater than zero. + m = (self.psi_upper_edges - self.psi_lower_edges) > 0 + return np.max(np.log10(self.psi_upper_edges[m])) + + @property + def pdf(self): + """(read-only) The probability-density-function + P(E_reco,psi,ang_err|E_nu,dec_nu), which, by definition, is the + histogram property divided by the 3D bin volumes for E_reco, psi, and + ang_err. + """ + log10_reco_e_bw = self.reco_e_upper_edges - self.reco_e_lower_edges + psi_bw = self.psi_upper_edges - self.psi_lower_edges + ang_err_bw = self.ang_err_upper_edges - self.ang_err_lower_edges + + bin_volumes = ( + log10_reco_e_bw[ + :, :, :, np.newaxis, np.newaxis + ] * + psi_bw[ + :, :, :, :, np.newaxis + ] * + ang_err_bw[ + :, :, :, :, : + ] + ) + + # Divide the histogram bin probability values by their bin volume. + # We do this only where the histogram actually has non-zero entries. + pdf = np.copy(self.histogram) + m = self.histogram != 0 + pdf[m] /= bin_volumes[m] + + return pdf + + def get_true_dec_idx(self, true_dec): + """Returns the true declination index for the given true declination + value. + + Parameters + ---------- + true_dec : float + The true declination value in radians. + + Returns + ------- + true_dec_idx : int + The index of the declination bin for the given declination value. + """ + if (true_dec < self.true_dec_bin_edges[0]) or\ + (true_dec > self.true_dec_bin_edges[-1]): + raise ValueError('The declination {} degrees is not supported by ' + 'the smearing matrix!'.format(true_dec)) + + true_dec_idx = np.digitize(true_dec, self.true_dec_bin_edges) - 1 + + return true_dec_idx + + def get_log10_true_e_idx(self, log10_true_e): + """Returns the bin index for the given true log10 energy value. + + Parameters + ---------- + log10_true_e : float + The log10 value of the true energy. + + Returns + ------- + log10_true_e_idx : int + The index of the true log10 energy bin for the given log10 true + energy value. + """ + if (log10_true_e < self.true_e_bin_edges[0]) or\ + (log10_true_e > self.true_e_bin_edges[-1]): + raise ValueError( + 'The log10 true energy value {} is not supported by the ' + 'smearing matrix!'.format(log10_true_e)) + + log10_true_e_idx = np.digitize( + log10_true_e, self._true_e_bin_edges) - 1 + + return log10_true_e_idx + + def get_reco_e_idx(self, true_e_idx, true_dec_idx, reco_e): + """Returns the bin index for the given reco energy value given the + given true energy and true declination bin indices. + + Parameters + ---------- + true_e_idx : int + The index of the true energy bin. + true_dec_idx : int + The index of the true declination bin. + reco_e : float + The reco energy value for which the bin index should get returned. + + Returns + ------- + reco_e_idx : int | None + The index of the reco energy bin the given reco energy value falls + into. It returns None if the value is out of range. + """ + lower_edges = self.reco_e_lower_edges[true_e_idx,true_dec_idx] + upper_edges = self.reco_e_upper_edges[true_e_idx,true_dec_idx] + + m = (lower_edges <= reco_e) & (upper_edges > reco_e) + idxs = np.nonzero(m)[0] + if(len(idxs) == 0): + return None + + reco_e_idx = idxs[0] + + return reco_e_idx + + def get_psi_idx(self, true_e_idx, true_dec_idx, reco_e_idx, psi): + """Returns the bin index for the given psi value given the + true energy, true declination and reco energy bin indices. + + Parameters + ---------- + true_e_idx : int + The index of the true energy bin. + true_dec_idx : int + The index of the true declination bin. + reco_e_idx : int + The index of the reco energy bin. + psi : float + The psi value in radians for which the bin index should get + returned. + + Returns + ------- + psi_idx : int | None + The index of the psi bin the given psi value falls into. + It returns None if the value is out of range. + """ + lower_edges = self.psi_lower_edges[true_e_idx,true_dec_idx,reco_e_idx] + upper_edges = self.psi_upper_edges[true_e_idx,true_dec_idx,reco_e_idx] + + m = (lower_edges <= psi) & (upper_edges > psi) + idxs = np.nonzero(m)[0] + if(len(idxs) == 0): + return None + + psi_idx = idxs[0] + + return psi_idx + + def get_ang_err_idx( + self, true_e_idx, true_dec_idx, reco_e_idx, psi_idx, ang_err): + """Returns the bin index for the given angular error value given the + true energy, true declination, reco energy, and psi bin indices. + + Parameters + ---------- + true_e_idx : int + The index of the true energy bin. + true_dec_idx : int + The index of the true declination bin. + reco_e_idx : int + The index of the reco energy bin. + psi_idx : int + The index of the psi bin. + ang_err : float + The angular error value in radians for which the bin index should + get returned. + + Returns + ------- + ang_err_idx : int | None + The index of the angular error bin the given angular error value + falls into. It returns None if the value is out of range. + """ + lower_edges = self.ang_err_lower_edges[ + true_e_idx,true_dec_idx,reco_e_idx,psi_idx] + upper_edges = self.ang_err_upper_edges[ + true_e_idx,true_dec_idx,reco_e_idx,psi_idx] + + m = (lower_edges <= ang_err) & (upper_edges > ang_err) + idxs = np.nonzero(m)[0] + if(len(idxs) == 0): + return None + + ang_err_idx = idxs[0] + + return ang_err_idx + + def get_true_log_e_range_with_valid_log_e_pdfs(self, dec_idx): + """Determines the true log energy range for which log_e PDFs are + available for the given declination bin. + + Parameters + ---------- + dec_idx : int + The declination bin index. + + Returns + ------- + min_log_true_e : float + The minimum true log energy value. + max_log_true_e : float + The maximum true log energy value. + """ + m = np.sum( + (self.reco_e_upper_edges[:,dec_idx] - + self.reco_e_lower_edges[:,dec_idx] > 0), + axis=1) != 0 + min_log_true_e = np.min(self.true_e_bin_edges[:-1][m]) + max_log_true_e = np.max(self.true_e_bin_edges[1:][m]) + + return (min_log_true_e, max_log_true_e) + + def get_log_e_pdf( + self, log_true_e_idx, dec_idx): + """Retrieves the log_e PDF from the given true energy bin index and + source bin index. + Returns (None, None, None, None) if any of the bin indices are less then + zero, or if the sum of all pdf bins is zero. + + Parameters + ---------- + log_true_e_idx : int + The index of the true energy bin. + dec_idx : int + The index of the declination bin. + + Returns + ------- + pdf : 1d ndarray + The log_e pdf values. + lower_bin_edges : 1d ndarray + The lower bin edges of the energy pdf histogram. + upper_bin_edges : 1d ndarray + The upper bin edges of the energy pdf histogram. + bin_widths : 1d ndarray + The bin widths of the energy pdf histogram. + """ + if log_true_e_idx < 0 or dec_idx < 0: + return (None, None, None, None) + + pdf = self.histogram[log_true_e_idx, dec_idx] + pdf = np.sum(pdf, axis=(-2, -1)) + + if np.sum(pdf) == 0: + return (None, None, None, None) + + # Get the reco energy bin edges and widths. + lower_bin_edges = self.reco_e_lower_edges[ + log_true_e_idx, dec_idx + ] + upper_bin_edges = self.reco_e_upper_edges[ + log_true_e_idx, dec_idx + ] + bin_widths = upper_bin_edges - lower_bin_edges + + # Normalize the PDF. + pdf /= np.sum(pdf) * bin_widths + + return (pdf, lower_bin_edges, upper_bin_edges, bin_widths) + + def get_psi_pdf( + self, log_true_e_idx, dec_idx, log_e_idx): + """Retrieves the psi PDF from the given true energy bin index, the + source bin index, and the log_e bin index. + Returns (None, None, None, None) if any of the bin indices are less then + zero, or if the sum of all pdf bins is zero. + + Parameters + ---------- + log_true_e_idx : int + The index of the true energy bin. + dec_idx : int + The index of the declination bin. + log_e_idx : int + The index of the log_e bin. + + Returns + ------- + pdf : 1d ndarray + The psi pdf values. + lower_bin_edges : 1d ndarray + The lower bin edges of the psi pdf histogram. + upper_bin_edges : 1d ndarray + The upper bin edges of the psi pdf histogram. + bin_widths : 1d ndarray + The bin widths of the psi pdf histogram. + """ + if log_true_e_idx < 0 or dec_idx < 0 or log_e_idx < 0: + return (None, None, None, None) + + pdf = self.histogram[log_true_e_idx, dec_idx, log_e_idx] + pdf = np.sum(pdf, axis=-1) + + if np.sum(pdf) == 0: + return (None, None, None, None) + + # Get the PSI bin edges and widths. + lower_bin_edges = self.psi_lower_edges[ + log_true_e_idx, dec_idx, log_e_idx + ] + upper_bin_edges = self.psi_upper_edges[ + log_true_e_idx, dec_idx, log_e_idx + ] + bin_widths = upper_bin_edges - lower_bin_edges + + # Normalize the PDF. + pdf /= np.sum(pdf) * bin_widths + + return (pdf, lower_bin_edges, upper_bin_edges, bin_widths) + + def get_ang_err_pdf( + self, log_true_e_idx, dec_idx, log_e_idx, psi_idx): + """Retrieves the angular error PDF from the given true energy bin index, + the source bin index, the log_e bin index, and the psi bin index. + Returns (None, None, None, None) if any of the bin indices are less then + zero, or if the sum of all pdf bins is zero. + + Parameters + ---------- + log_true_e_idx : int + The index of the true energy bin. + dec_idx : int + The index of the declination bin. + log_e_idx : int + The index of the log_e bin. + psi_idx : int + The index of the psi bin. + + Returns + ------- + pdf : 1d ndarray + The ang_err pdf values. + lower_bin_edges : 1d ndarray + The lower bin edges of the ang_err pdf histogram. + upper_bin_edges : 1d ndarray + The upper bin edges of the ang_err pdf histogram. + bin_widths : 1d ndarray + The bin widths of the ang_err pdf histogram. + """ + if log_true_e_idx < 0 or dec_idx < 0 or log_e_idx < 0 or psi_idx < 0: + return (None, None, None, None) + + pdf = self.histogram[log_true_e_idx, dec_idx, log_e_idx, psi_idx] + + if np.sum(pdf) == 0: + return (None, None, None, None) + + # Get the ang_err bin edges and widths. + lower_bin_edges = self.ang_err_lower_edges[ + log_true_e_idx, dec_idx, log_e_idx, psi_idx + ] + upper_bin_edges = self.ang_err_upper_edges[ + log_true_e_idx, dec_idx, log_e_idx, psi_idx + ] + bin_widths = upper_bin_edges - lower_bin_edges + + # Some bins might not be defined, i.e. have zero bin widths. + valid = bin_widths > 0 + + pdf = pdf[valid] + lower_bin_edges = lower_bin_edges[valid] + upper_bin_edges = upper_bin_edges[valid] + bin_widths = bin_widths[valid] + + # Normalize the PDF. + pdf = pdf / (np.sum(pdf) * bin_widths) + + return (pdf, lower_bin_edges, upper_bin_edges, bin_widths) + + def sample_log_e( + self, rss, dec_idx, log_true_e_idxs): + """Samples log energy values for the given source declination and true + energy bins. + + Parameters + ---------- + rss : instance of RandomStateService + The RandomStateService which should be used for drawing random + numbers from. + dec_idx : int + The index of the source declination bin. + log_true_e_idxs : 1d ndarray of int + The bin indices of the true energy bins. + + Returns + ------- + log_e_idx : 1d ndarray of int + The bin indices of the log_e pdf corresponding to the sampled + log_e values. + log_e : 1d ndarray of float + The sampled log_e values. + """ + n_evt = len(log_true_e_idxs) + log_e_idx = np.empty((n_evt,), dtype=np.int_) + log_e = np.empty((n_evt,), dtype=np.double) + + unique_log_true_e_idxs = np.unique(log_true_e_idxs) + for b_log_true_e_idx in unique_log_true_e_idxs: + m = log_true_e_idxs == b_log_true_e_idx + b_size = np.count_nonzero(m) + ( + pdf, + low_bin_edges, + up_bin_edges, + bin_widths + ) = self.get_log_e_pdf( + b_log_true_e_idx, + dec_idx) + + if pdf is None: + log_e_idx[m] = -1 + log_e[m] = np.nan + continue + + b_log_e_idx = rss.random.choice( + np.arange(len(pdf)), + p=(pdf * bin_widths), + size=b_size) + b_log_e = rss.random.uniform( + low_bin_edges[b_log_e_idx], + up_bin_edges[b_log_e_idx], + size=b_size) + + log_e_idx[m] = b_log_e_idx + log_e[m] = b_log_e + + return (log_e_idx, log_e) + + def sample_psi( + self, rss, dec_idx, log_true_e_idxs, log_e_idxs): + """Samples psi values for the given source declination, true + energy bins, and log_e bins. + + Parameters + ---------- + rss : instance of RandomStateService + The RandomStateService which should be used for drawing random + numbers from. + dec_idx : int + The index of the source declination bin. + log_true_e_idxs : 1d ndarray of int + The bin indices of the true energy bins. + log_e_idxs : 1d ndarray of int + The bin indices of the log_e bins. + + Returns + ------- + psi_idx : 1d ndarray of int + The bin indices of the psi pdf corresponding to the sampled psi + values. + psi : 1d ndarray of float + The sampled psi values in radians. + """ + if(len(log_true_e_idxs) != len(log_e_idxs)): + raise ValueError( + 'The lengths of log_true_e_idxs and log_e_idxs must be equal!') + + n_evt = len(log_true_e_idxs) + psi_idx = np.empty((n_evt,), dtype=np.int_) + psi = np.empty((n_evt,), dtype=np.double) + + unique_log_true_e_idxs = np.unique(log_true_e_idxs) + for b_log_true_e_idx in unique_log_true_e_idxs: + m = log_true_e_idxs == b_log_true_e_idx + bb_unique_log_e_idxs = np.unique(log_e_idxs[m]) + for bb_log_e_idx in bb_unique_log_e_idxs: + mm = m & (log_e_idxs == bb_log_e_idx) + bb_size = np.count_nonzero(mm) + ( + pdf, + low_bin_edges, + up_bin_edges, + bin_widths + ) = self.get_psi_pdf( + b_log_true_e_idx, + dec_idx, + bb_log_e_idx) + + if pdf is None: + psi_idx[mm] = -1 + psi[mm] = np.nan + continue + + bb_psi_idx = rss.random.choice( + np.arange(len(pdf)), + p=(pdf * bin_widths), + size=bb_size) + bb_psi = rss.random.uniform( + low_bin_edges[bb_psi_idx], + up_bin_edges[bb_psi_idx], + size=bb_size) + + psi_idx[mm] = bb_psi_idx + psi[mm] = bb_psi + + return (psi_idx, psi) + + def sample_ang_err( + self, rss, dec_idx, log_true_e_idxs, log_e_idxs, psi_idxs): + """Samples ang_err values for the given source declination, true + energy bins, log_e bins, and psi bins. + + Parameters + ---------- + rss : instance of RandomStateService + The RandomStateService which should be used for drawing random + numbers from. + dec_idx : int + The index of the source declination bin. + log_true_e_idxs : 1d ndarray of int + The bin indices of the true energy bins. + log_e_idxs : 1d ndarray of int + The bin indices of the log_e bins. + psi_idxs : 1d ndarray of int + The bin indices of the psi bins. + + Returns + ------- + ang_err_idx : 1d ndarray of int + The bin indices of the angular error pdf corresponding to the + sampled angular error values. + ang_err : 1d ndarray of float + The sampled angular error values in radians. + """ + if (len(log_true_e_idxs) != len(log_e_idxs)) and\ + (len(log_e_idxs) != len(psi_idxs)): + raise ValueError( + 'The lengths of log_true_e_idxs, log_e_idxs, and psi_idxs must ' + 'be equal!') + + n_evt = len(log_true_e_idxs) + ang_err_idx = np.empty((n_evt,), dtype=np.int_) + ang_err = np.empty((n_evt,), dtype=np.double) + + unique_log_true_e_idxs = np.unique(log_true_e_idxs) + for b_log_true_e_idx in unique_log_true_e_idxs: + m = log_true_e_idxs == b_log_true_e_idx + bb_unique_log_e_idxs = np.unique(log_e_idxs[m]) + for bb_log_e_idx in bb_unique_log_e_idxs: + mm = m & (log_e_idxs == bb_log_e_idx) + bbb_unique_psi_idxs = np.unique(psi_idxs[mm]) + for bbb_psi_idx in bbb_unique_psi_idxs: + mmm = mm & (psi_idxs == bbb_psi_idx) + bbb_size = np.count_nonzero(mmm) + ( + pdf, + low_bin_edges, + up_bin_edges, + bin_widths + ) = self.get_ang_err_pdf( + b_log_true_e_idx, + dec_idx, + bb_log_e_idx, + bbb_psi_idx) + + if pdf is None: + ang_err_idx[mmm] = -1 + ang_err[mmm] = np.nan + continue + + bbb_ang_err_idx = rss.random.choice( + np.arange(len(pdf)), + p=(pdf * bin_widths), + size=bbb_size) + bbb_ang_err = rss.random.uniform( + low_bin_edges[bbb_ang_err_idx], + up_bin_edges[bbb_ang_err_idx], + size=bbb_size) + + ang_err_idx[mmm] = bbb_ang_err_idx + ang_err[mmm] = bbb_ang_err + + return (ang_err_idx, ang_err) diff --git a/skyllh/analyses/i3/publicdata_ps/time_dependent_ps.py b/skyllh/analyses/i3/publicdata_ps/time_dependent_ps.py new file mode 100644 index 0000000000..a6e6daabcd --- /dev/null +++ b/skyllh/analyses/i3/publicdata_ps/time_dependent_ps.py @@ -0,0 +1,537 @@ +# -*- coding: utf-8 -*- + +"""Setup the time-dependent analysis. For now this works on a single dataset. +""" + +import argparse +import logging +import numpy as np + +from skyllh.core.progressbar import ProgressBar + +# Classes to define the source hypothesis. +from skyllh.physics.source import PointLikeSource +from skyllh.physics.flux import PowerLawFlux +from skyllh.core.source_hypo_group import SourceHypoGroup +from skyllh.core.source_hypothesis import SourceHypoGroupManager + +# Classes to define the fit parameters. +from skyllh.core.parameters import ( + SingleSourceFitParameterMapper, + FitParameter +) + +# Classes for the minimizer. +from skyllh.core.minimizer import Minimizer, LBFGSMinimizerImpl +from skyllh.core.minimizers.iminuit import IMinuitMinimizerImpl + +# Classes for utility functionality. +from skyllh.core.config import CFG +from skyllh.core.random import RandomStateService +from skyllh.core.optimize import SpatialBoxEventSelectionMethod +from skyllh.core.smoothing import BlockSmoothingFilter +from skyllh.core.timing import TimeLord +from skyllh.core.trialdata import TrialDataManager + +# Classes for defining the analysis. +from skyllh.core.test_statistic import TestStatisticWilks +from skyllh.core.analysis import ( + TimeIntegratedMultiDatasetSingleSourceAnalysis, +) + +# Classes to define the background generation. +from skyllh.core.scrambling import DataScrambler +from skyllh.i3.scrambling import I3SeasonalVariationTimeScramblingMethod +from skyllh.i3.background_generation import FixedScrambledExpDataI3BkgGenMethod + +# Classes to define the signal and background PDFs. +from skyllh.core.signalpdf import ( + RayleighPSFPointSourceSignalSpatialPDF, + SignalBoxTimePDF, + SignalGaussTimePDF +) +from skyllh.core.backgroundpdf import BackgroundUniformTimePDF +from skyllh.i3.backgroundpdf import ( + DataBackgroundI3SpatialPDF +) +from skyllh.core.pdf import TimePDF + +# Classes to define the spatial and energy PDF ratios. +from skyllh.core.pdfratio import ( + SpatialSigOverBkgPDFRatio, + SigOverBkgPDFRatio +) + +# Analysis utilities. +from skyllh.core.analysis_utils import ( + pointlikesource_to_data_field_array +) + +from skyllh.core.expectation_maximization import em_fit + +# Analysis specific classes for working with the public data. +from skyllh.analyses.i3.publicdata_ps.signal_generator import ( + PDTimeDependentSignalGenerator +) +from skyllh.analyses.i3.publicdata_ps.detsigyield import ( + PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod +) +from skyllh.analyses.i3.publicdata_ps.signalpdf import ( + PDSignalEnergyPDFSet +) +from skyllh.analyses.i3.publicdata_ps.pdfratio import ( + PDPDFRatio +) +from skyllh.analyses.i3.publicdata_ps.backgroundpdf import ( + PDDataBackgroundI3EnergyPDF +) +from skyllh.analyses.i3.publicdata_ps.utils import ( + create_energy_cut_spline, +) +from skyllh.analyses.i3.publicdata_ps.time_integrated_ps import ( + psi_func, +) + + +def change_time_pdf(analysis, gauss=None, box=None): + """Changes the time pdf. + + Parameters + ---------- + gauss : dict | None + None or dictionary with {"mu": float, "sigma": float}. + box : dict | None + None or dictionary with {"start": float, "end": float}. + """ + + if gauss is None and box is None: + raise TypeError("Either gauss or box have to be specified as time pdf.") + + grl = analysis._data_list[0].grl + # redo this in case the background pdf was not calculated before + time_bkgpdf = BackgroundUniformTimePDF(grl) + if gauss is not None: + time_sigpdf = SignalGaussTimePDF(grl, gauss['mu'], gauss['sigma']) + elif box is not None: + time_sigpdf = SignalBoxTimePDF(grl, box["start"], box["end"]) + + time_pdfratio = SigOverBkgPDFRatio( + sig_pdf=time_sigpdf, + bkg_pdf=time_bkgpdf, + pdf_type=TimePDF + ) + + # the next line seems to make no difference in the llh evaluation. We keep it for consistency + analysis._llhratio.llhratio_list[0].pdfratio_list[2] = time_pdfratio + # this line here is relevant for the llh evaluation + analysis._llhratio.llhratio_list[0]._pdfratioarray._pdfratio_list[2] = time_pdfratio + + # change detector signal yield with flare livetime in sample (1 / grl_norm in pdf), + # rebuild the histograms if it is changed... + + +def get_energy_spatial_signal_over_background(analysis, fitparams): + """Returns the signal over background ratio for + (spatial_signal * energy_signal) / (spatial_background * energy_background). + + Parameter + --------- + fitparams : dict + Dictionary with {"gamma": float} for energy pdf. + + Returns + ------- + ratio : 1d ndarray + Product of spatial and energy signal over background pdfs. + """ + + ratio = analysis._llhratio.llhratio_list[0].pdfratio_list[0].get_ratio(analysis._tdm_list[0], fitparams) + ratio *= analysis._llhratio.llhratio_list[0].pdfratio_list[1].get_ratio(analysis._tdm_list[0], fitparams) + + return ratio + + +def change_fluxmodel_gamma(analysis, gamma): + """Set new gamma for the flux model. + + Parameter + --------- + gamma : float + Spectral index for flux model. + """ + + analysis.src_hypo_group_manager.src_hypo_group_list[0].fluxmodel.gamma = gamma + + +def change_signal_time(analysis, gauss=None, box=None): + """Change the signal injection to gauss or box. + + Parameters + ---------- + gauss : dict | None + None or dictionary {"mu": float, "sigma": float}. + box : dict | None + None or dictionary {"start" : float, "end" : float}. + """ + + analysis.sig_generator.set_flare(box=box, gauss=gauss) + + +def calculate_TS(analysis, em_results, rss): + """Calculate the best TS value for the expectation maximization gamma scan. + + Parameters + ---------- + em_results : 1d ndarray of tuples + Gamma scan result. + rss : instance of RandomStateService + The instance of RandomStateService that should be used to generate + random numbers from. + + Returns + ------- + float maximized TS value + tuple(gamma from em scan [float], best fit mean time [float], best fit width [float]) + (float ns, float gamma) fitparams from TS optimization + """ + + max_TS = 0 + best_time = None + best_fitparams = None + for index, result in enumerate(em_results): + change_time_pdf(analysis, gauss={"mu": result["mu"], "sigma": result["sigma"]}) + (fitparamset, log_lambda_max, fitparam_values, status) = analysis.maximize_llhratio(rss) + TS = analysis.calculate_test_statistic(log_lambda_max, fitparam_values) + if TS > max_TS: + max_TS = TS + best_time = result + best_fitparams = fitparam_values + + return max_TS, best_time, best_fitparams + + +def run_gamma_scan_single_flare(analysis, remove_time=None, gamma_min=1, gamma_max=5, n_gamma=51): + """Run em for different gammas in the signal energy pdf + + Parameters + ---------- + remove_time : float + Time information of event that should be removed. + gamma_min : float + Lower bound for gamma scan. + gamma_max : float + Upper bound for gamma scan. + n_gamma : int + Number of steps for gamma scan. + + Returns + ------- + array with "gamma", "mu", "sigma", and scaling factor for flare "ns_em" + """ + dtype = [("gamma", "f8"), ("mu", "f8"), ("sigma", "f8"), ("ns_em", "f8")] + results = np.empty(n_gamma, dtype=dtype) + + time = analysis._tdm_list[0].get_data("time") + + for index, g in enumerate(np.linspace(gamma_min, gamma_max, n_gamma)): + ratio = get_energy_spatial_signal_over_background(analysis, {"gamma": g}) + mu, sigma, ns = em_fit(time, ratio, n=1, tol=1.e-200, iter_max=500, weight_thresh=0, + initial_width=5000, remove_x=remove_time) + results[index] = (g, mu[0], sigma[0], ns[0]) + + return results + + +def unblind_flare(analysis, remove_time=None): + """Run EM on unscrambled data. Similar to the original analysis, remove the alert event. + + Parameters + ---------- + remove_time : float + Time information of event that should be removed. + In the case of the TXS analysis: remove_time=58018.8711856 + + Returns + ------- + array with "gamma", "mu", "sigma", and scaling factor for flare "ns_em" + """ + + # get the original unblinded data + rss = RandomStateService(seed=1) + analysis.unblind(rss) + time_results = run_gamma_scan_single_flare(analysis, remove_time=remove_time) + return time_results + + +def create_analysis( + datasets, + source, + gauss=None, + box=None, + refplflux_Phi0=1, + refplflux_E0=1e3, + refplflux_gamma=2.0, + ns_seed=100.0, + ns_min=0., + ns_max=1e3, + gamma_seed=3.0, + gamma_min=1., + gamma_max=5., + kde_smoothing=False, + minimizer_impl="LBFGS", + cut_sindec=None, + spl_smooth=None, + cap_ratio=False, + compress_data=False, + keep_data_fields=None, + optimize_delta_angle=10, + tl=None, + ppbar=None +): + """Creates the Analysis instance for this particular analysis. + + Parameters: + ----------- + datasets : list of Dataset instances + The list of Dataset instances, which should be used in the + analysis. + source : PointLikeSource instance + The PointLikeSource instance defining the point source position. + gauss : None or dictionary with mu, sigma + None if no Gaussian time pdf. Else dictionary with {"mu": float, "sigma": float} of Gauss + box : None or dictionary with start, end + None if no Box shaped time pdf. Else dictionary with {"start": float, "end": float} of box. + refplflux_Phi0 : float + The flux normalization to use for the reference power law flux model. + refplflux_E0 : float + The reference energy to use for the reference power law flux model. + refplflux_gamma : float + The spectral index to use for the reference power law flux model. + ns_seed : float + Value to seed the minimizer with for the ns fit. + ns_min : float + Lower bound for ns fit. + ns_max : float + Upper bound for ns fit. + gamma_seed : float | None + Value to seed the minimizer with for the gamma fit. If set to None, + the refplflux_gamma value will be set as gamma_seed. + gamma_min : float + Lower bound for gamma fit. + gamma_max : float + Upper bound for gamma fit. + kde_smoothing : bool + Apply a KDE-based smoothing to the data-driven background pdf. + Default: False. + minimizer_impl : str | "LBFGS" + Minimizer implementation to be used. Supported options are "LBFGS" + (L-BFG-S minimizer used from the :mod:`scipy.optimize` module), or + "minuit" (Minuit minimizer used by the :mod:`iminuit` module). + Default: "LBFGS". + cut_sindec : list of float | None + sin(dec) values at which the energy cut in the southern sky should + start. If None, np.sin(np.radians([-2, 0, -3, 0, 0])) is used. + spl_smooth : list of float + Smoothing parameters for the 1D spline for the energy cut. If None, + [0., 0.005, 0.05, 0.2, 0.3] is used. + cap_ratio : bool + If set to True, the energy PDF ratio will be capped to a finite value + where no background energy PDF information is available. This will + ensure that an energy PDF ratio is available for high energies where + no background is available from the experimental data. + If kde_smoothing is set to True, cap_ratio should be set to False! + Default is False. + compress_data : bool + Flag if the data should get converted from float64 into float32. + keep_data_fields : list of str | None + List of additional data field names that should get kept when loading + the data. + optimize_delta_angle : float + The delta angle in degrees for the event selection optimization methods. + tl : TimeLord instance | None + The TimeLord instance to use to time the creation of the analysis. + ppbar : ProgressBar instance | None + The instance of ProgressBar for the optional parent progress bar. + + Returns + ------- + analysis : TimeIntegratedMultiDatasetSingleSourceAnalysis + The Analysis instance for this analysis. + """ + + if len(datasets) != 1: + raise RuntimeError( + 'This analysis supports only analyses with only single datasets ' + 'at the moment!') + + if gauss is None and box is None: + raise ValueError("No time pdf specified (box or gauss)") + if gauss is not None and box is not None: + raise ValueError( + "Time PDF cannot be both Gaussian and box shaped. " + "Please specify only one shape.") + + # Create the minimizer instance. + if minimizer_impl == "LBFGS": + minimizer = Minimizer(LBFGSMinimizerImpl()) + elif minimizer_impl == "minuit": + minimizer = Minimizer(IMinuitMinimizerImpl(ftol=1e-8)) + else: + raise NameError( + f"Minimizer implementation `{minimizer_impl}` is not supported " + "Please use `LBFGS` or `minuit`.") + + # Define the flux model. + flux_model = PowerLawFlux( + Phi0=refplflux_Phi0, E0=refplflux_E0, gamma=refplflux_gamma) + + # Define the fit parameter ns. + fitparam_ns = FitParameter('ns', ns_min, ns_max, ns_seed) + + # Define the gamma fit parameter. + fitparam_gamma = FitParameter( + 'gamma', valmin=gamma_min, valmax=gamma_max, initial=gamma_seed) + + # Define the detector signal efficiency implementation method for the + # IceCube detector and this source and flux_model. + # The sin(dec) binning will be taken by the implementation method + # automatically from the Dataset instance. + gamma_grid = fitparam_gamma.as_linear_grid(delta=0.1) + detsigyield_implmethod = \ + PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod( + gamma_grid) + + # Define the signal generation method. + #sig_gen_method = PointLikeSourceI3SignalGenerationMethod() + sig_gen_method = None + + # Create a source hypothesis group manager. + src_hypo_group_manager = SourceHypoGroupManager( + SourceHypoGroup( + source, flux_model, detsigyield_implmethod, sig_gen_method)) + + # Create a source fit parameter mapper and define the fit parameters. + src_fitparam_mapper = SingleSourceFitParameterMapper() + src_fitparam_mapper.def_fit_parameter(fitparam_gamma) + + # Define the test statistic. + test_statistic = TestStatisticWilks() + + # Create the Analysis instance. + analysis = TimeIntegratedMultiDatasetSingleSourceAnalysis( + src_hypo_group_manager, + src_fitparam_mapper, + fitparam_ns, + test_statistic, + sig_generator_cls=PDTimeDependentSignalGenerator + ) + + # Define the event selection method for pure optimization purposes. + # We will use the same method for all datasets. + event_selection_method = SpatialBoxEventSelectionMethod( + src_hypo_group_manager, delta_angle=np.deg2rad(optimize_delta_angle)) + + # Prepare the spline parameters. + if cut_sindec is None: + cut_sindec = np.sin(np.radians([-2, 0, -3, 0, 0])) + if spl_smooth is None: + spl_smooth = [0., 0.005, 0.05, 0.2, 0.3] + if len(spl_smooth) < len(datasets) or len(cut_sindec) < len(datasets): + raise AssertionError( + "The length of the spl_smooth and of the cut_sindec must be equal " + f"to the length of datasets: {len(datasets)}.") + + # Add the data sets to the analysis. + pbar = ProgressBar(len(datasets), parent=ppbar).start() + data_list = [] + energy_cut_splines = [] + for idx, ds in enumerate(datasets): + # Load the data of the data set. + data = ds.load_and_prepare_data( + keep_fields=keep_data_fields, + compress=compress_data, + tl=tl) + data_list.append(data) + + # Create a trial data manager and add the required data fields. + tdm = TrialDataManager() + tdm.add_source_data_field('src_array', + pointlikesource_to_data_field_array) + tdm.add_data_field('psi', psi_func) + + sin_dec_binning = ds.get_binning_definition('sin_dec') + log_energy_binning = ds.get_binning_definition('log_energy') + + # Create the spatial PDF ratio instance for this dataset. + spatial_sigpdf = RayleighPSFPointSourceSignalSpatialPDF( + dec_range=np.arcsin(sin_dec_binning.range)) + spatial_bkgpdf = DataBackgroundI3SpatialPDF( + data.exp, sin_dec_binning) + spatial_pdfratio = SpatialSigOverBkgPDFRatio( + spatial_sigpdf, spatial_bkgpdf) + + # Create the energy PDF ratio instance for this dataset. + energy_sigpdfset = PDSignalEnergyPDFSet( + ds=ds, + src_dec=source.dec, + flux_model=flux_model, + fitparam_grid_set=gamma_grid, + ppbar=ppbar + ) + smoothing_filter = BlockSmoothingFilter(nbins=1) + energy_bkgpdf = PDDataBackgroundI3EnergyPDF( + data.exp, log_energy_binning, sin_dec_binning, + smoothing_filter, kde_smoothing) + + energy_pdfratio = PDPDFRatio( + sig_pdf_set=energy_sigpdfset, + bkg_pdf=energy_bkgpdf, + cap_ratio=cap_ratio + ) + + pdfratios = [spatial_pdfratio, energy_pdfratio] + + # Create the time PDF ratio instance for this dataset. + if gauss is not None or box is not None: + time_bkgpdf = BackgroundUniformTimePDF(data.grl) + if gauss is not None: + time_sigpdf = SignalGaussTimePDF( + data.grl, gauss['mu'], gauss['sigma']) + elif box is not None: + time_sigpdf = SignalBoxTimePDF( + data.grl, box["start"], box["end"]) + time_pdfratio = SigOverBkgPDFRatio( + sig_pdf=time_sigpdf, + bkg_pdf=time_bkgpdf, + pdf_type=TimePDF + ) + pdfratios.append(time_pdfratio) + + analysis.add_dataset( + ds, data, pdfratios, tdm, event_selection_method) + + energy_cut_spline = create_energy_cut_spline( + ds, data.exp, spl_smooth[idx]) + energy_cut_splines.append(energy_cut_spline) + + pbar.increment() + pbar.finish() + + analysis.llhratio = analysis.construct_llhratio(minimizer, ppbar=ppbar) + + # Define the data scrambler with its data scrambling method, which is used + # for background generation. + + # FIXME: Support multiple datasets for the DataScrambler. + data_scrambler = DataScrambler(I3SeasonalVariationTimeScramblingMethod(data_list[0])) + # Create background generation method. + bkg_gen_method = FixedScrambledExpDataI3BkgGenMethod(data_scrambler) + + analysis.bkg_gen_method = bkg_gen_method + analysis.construct_background_generator() + + analysis.construct_signal_generator( + llhratio=analysis.llhratio, energy_cut_splines=energy_cut_splines, + cut_sindec=cut_sindec, box=box, gauss=gauss) + + return analysis diff --git a/skyllh/analyses/i3/publicdata_ps/time_integrated_ps.py b/skyllh/analyses/i3/publicdata_ps/time_integrated_ps.py new file mode 100644 index 0000000000..a111c59567 --- /dev/null +++ b/skyllh/analyses/i3/publicdata_ps/time_integrated_ps.py @@ -0,0 +1,461 @@ +# -*- coding: utf-8 -*- + +"""The time_integrated_ps analysis is a multi-dataset time-integrated single source +analysis with a two-component likelihood function using a spacial and an energy +event PDF. +""" + +import argparse +import logging +import numpy as np + +from skyllh.core.progressbar import ProgressBar + +# Classes to define the source hypothesis. +from skyllh.physics.source import PointLikeSource +from skyllh.physics.flux import PowerLawFlux +from skyllh.core.source_hypo_group import SourceHypoGroup +from skyllh.core.source_hypothesis import SourceHypoGroupManager + +# Classes to define the fit parameters. +from skyllh.core.parameters import ( + SingleSourceFitParameterMapper, + FitParameter +) + +# Classes for the minimizer. +from skyllh.core.minimizer import Minimizer, LBFGSMinimizerImpl +from skyllh.core.minimizers.iminuit import IMinuitMinimizerImpl + +# Classes for utility functionality. +from skyllh.core.config import CFG +from skyllh.core.random import RandomStateService +from skyllh.core.optimize import SpatialBoxEventSelectionMethod +from skyllh.core.smoothing import BlockSmoothingFilter +from skyllh.core.timing import TimeLord +from skyllh.core.trialdata import TrialDataManager + +# Classes for defining the analysis. +from skyllh.core.test_statistic import TestStatisticWilks +from skyllh.core.analysis import ( + TimeIntegratedMultiDatasetSingleSourceAnalysis as Analysis +) + +# Classes to define the background generation. +from skyllh.core.scrambling import DataScrambler, UniformRAScramblingMethod +from skyllh.i3.background_generation import FixedScrambledExpDataI3BkgGenMethod + +# Classes to define the signal and background PDFs. +from skyllh.core.signalpdf import RayleighPSFPointSourceSignalSpatialPDF +from skyllh.i3.backgroundpdf import ( + DataBackgroundI3SpatialPDF +) + +# Classes to define the spatial and energy PDF ratios. +from skyllh.core.pdfratio import SpatialSigOverBkgPDFRatio + +# Analysis utilities. +from skyllh.core.analysis_utils import ( + pointlikesource_to_data_field_array +) + +# Logging setup utilities. +from skyllh.core.debugging import ( + setup_logger, + setup_console_handler, + setup_file_handler +) + +# Pre-defined public IceCube data samples. +from skyllh.datasets.i3 import data_samples + +# Analysis specific classes for working with the public data. +from skyllh.analyses.i3.publicdata_ps.signal_generator import PDSignalGenerator + +from skyllh.analyses.i3.publicdata_ps.detsigyield import ( + PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod +) +from skyllh.analyses.i3.publicdata_ps.signalpdf import ( + PDSignalEnergyPDFSet +) +from skyllh.analyses.i3.publicdata_ps.pdfratio import ( + PDPDFRatio +) +from skyllh.analyses.i3.publicdata_ps.backgroundpdf import ( + PDDataBackgroundI3EnergyPDF +) +from skyllh.analyses.i3.publicdata_ps.utils import create_energy_cut_spline + + +def psi_func(tdm, src_hypo_group_manager, fitparams): + """Function to calculate the opening angle between the source position + and the event's reconstructed position. + """ + ra = tdm.get_data('ra') + dec = tdm.get_data('dec') + + # Make the source position angles two-dimensional so the PDF value + # can be calculated via numpy broadcasting automatically for several + # sources. This is useful for stacking analyses. + src_ra = tdm.get_data('src_array')['ra'][:, np.newaxis] + src_dec = tdm.get_data('src_array')['dec'][:, np.newaxis] + + delta_dec = np.abs(dec - src_dec) + delta_ra = np.abs(ra - src_ra) + x = ( + (np.sin(delta_dec / 2.))**2. + np.cos(dec) * + np.cos(src_dec) * (np.sin(delta_ra / 2.))**2. + ) + + # Handle possible floating precision errors. + x[x < 0.] = 0. + x[x > 1.] = 1. + + psi = (2.0*np.arcsin(np.sqrt(x))) + + # For now we support only a single source, hence return psi[0]. + return psi[0, :] + + +def TXS_location(): + src_ra = np.radians(77.358) + src_dec = np.radians(5.693) + return (src_ra, src_dec) + + +def create_analysis( + datasets, + source, + refplflux_Phi0=1, + refplflux_E0=1e3, + refplflux_gamma=2.0, + ns_seed=100.0, + ns_min=0., + ns_max=1e3, + gamma_seed=3.0, + gamma_min=1., + gamma_max=5., + kde_smoothing=False, + minimizer_impl="LBFGS", + cut_sindec=None, + spl_smooth=None, + cap_ratio=False, + compress_data=False, + keep_data_fields=None, + optimize_delta_angle=10, + tl=None, + ppbar=None +): + """Creates the Analysis instance for this particular analysis. + + Parameters: + ----------- + datasets : list of Dataset instances + The list of Dataset instances, which should be used in the + analysis. + source : PointLikeSource instance + The PointLikeSource instance defining the point source position. + refplflux_Phi0 : float + The flux normalization to use for the reference power law flux model. + refplflux_E0 : float + The reference energy to use for the reference power law flux model. + refplflux_gamma : float + The spectral index to use for the reference power law flux model. + ns_seed : float + Value to seed the minimizer with for the ns fit. + ns_min : float + Lower bound for ns fit. + ns_max : float + Upper bound for ns fit. + gamma_seed : float | None + Value to seed the minimizer with for the gamma fit. If set to None, + the refplflux_gamma value will be set as gamma_seed. + gamma_min : float + Lower bound for gamma fit. + gamma_max : float + Upper bound for gamma fit. + kde_smoothing : bool + Apply a KDE-based smoothing to the data-driven background pdf. + Default: False. + minimizer_impl : str | "LBFGS" + Minimizer implementation to be used. Supported options are "LBFGS" + (L-BFG-S minimizer used from the :mod:`scipy.optimize` module), or + "minuit" (Minuit minimizer used by the :mod:`iminuit` module). + Default: "LBFGS". + cut_sindec : list of float | None + sin(dec) values at which the energy cut in the southern sky should + start. If None, np.sin(np.radians([-2, 0, -3, 0, 0])) is used. + spl_smooth : list of float + Smoothing parameters for the 1D spline for the energy cut. If None, + [0., 0.005, 0.05, 0.2, 0.3] is used. + cap_ratio : bool + If set to True, the energy PDF ratio will be capped to a finite value + where no background energy PDF information is available. This will + ensure that an energy PDF ratio is available for high energies where + no background is available from the experimental data. + If kde_smoothing is set to True, cap_ratio should be set to False! + Default is False. + compress_data : bool + Flag if the data should get converted from float64 into float32. + keep_data_fields : list of str | None + List of additional data field names that should get kept when loading + the data. + optimize_delta_angle : float + The delta angle in degrees for the event selection optimization methods. + tl : TimeLord instance | None + The TimeLord instance to use to time the creation of the analysis. + ppbar : ProgressBar instance | None + The instance of ProgressBar for the optional parent progress bar. + + Returns + ------- + analysis : TimeIntegratedMultiDatasetSingleSourceAnalysis + The Analysis instance for this analysis. + """ + + # Create the minimizer instance. + if minimizer_impl == "LBFGS": + minimizer = Minimizer(LBFGSMinimizerImpl()) + elif minimizer_impl == "minuit": + minimizer = Minimizer(IMinuitMinimizerImpl(ftol=1e-8)) + else: + raise NameError( + f"Minimizer implementation `{minimizer_impl}` is not supported " + "Please use `LBFGS` or `minuit`.") + + # Define the flux model. + flux_model = PowerLawFlux( + Phi0=refplflux_Phi0, E0=refplflux_E0, gamma=refplflux_gamma) + + # Define the fit parameter ns. + fitparam_ns = FitParameter('ns', ns_min, ns_max, ns_seed) + + # Define the gamma fit parameter. + fitparam_gamma = FitParameter( + 'gamma', valmin=gamma_min, valmax=gamma_max, initial=gamma_seed) + + # Define the detector signal efficiency implementation method for the + # IceCube detector and this source and flux_model. + # The sin(dec) binning will be taken by the implementation method + # automatically from the Dataset instance. + gamma_grid = fitparam_gamma.as_linear_grid(delta=0.1) + detsigyield_implmethod = \ + PublicDataPowerLawFluxPointLikeSourceI3DetSigYieldImplMethod( + gamma_grid) + + # Define the signal generation method. + #sig_gen_method = PointLikeSourceI3SignalGenerationMethod() + sig_gen_method = None + + # Create a source hypothesis group manager. + src_hypo_group_manager = SourceHypoGroupManager( + SourceHypoGroup( + source, flux_model, detsigyield_implmethod, sig_gen_method)) + + # Create a source fit parameter mapper and define the fit parameters. + src_fitparam_mapper = SingleSourceFitParameterMapper() + src_fitparam_mapper.def_fit_parameter(fitparam_gamma) + + # Define the test statistic. + test_statistic = TestStatisticWilks() + + # Define the data scrambler with its data scrambling method, which is used + # for background generation. + data_scrambler = DataScrambler(UniformRAScramblingMethod()) + + # Create background generation method. + bkg_gen_method = FixedScrambledExpDataI3BkgGenMethod(data_scrambler) + + # Create the Analysis instance. + analysis = Analysis( + src_hypo_group_manager, + src_fitparam_mapper, + fitparam_ns, + test_statistic, + bkg_gen_method, + sig_generator_cls=PDSignalGenerator + ) + + # Define the event selection method for pure optimization purposes. + # We will use the same method for all datasets. + event_selection_method = SpatialBoxEventSelectionMethod( + src_hypo_group_manager, delta_angle=np.deg2rad(optimize_delta_angle)) + + # Prepare the spline parameters. + if cut_sindec is None: + cut_sindec = np.sin(np.radians([-2, 0, -3, 0, 0])) + if spl_smooth is None: + spl_smooth = [0., 0.005, 0.05, 0.2, 0.3] + if len(spl_smooth) < len(datasets) or len(cut_sindec) < len(datasets): + raise AssertionError( + "The length of the spl_smooth and of the cut_sindec must be equal " + f"to the length of datasets: {len(datasets)}.") + + # Add the data sets to the analysis. + pbar = ProgressBar(len(datasets), parent=ppbar).start() + energy_cut_splines = [] + for idx, ds in enumerate(datasets): + # Load the data of the data set. + data = ds.load_and_prepare_data( + keep_fields=keep_data_fields, + compress=compress_data, + tl=tl) + + # Create a trial data manager and add the required data fields. + tdm = TrialDataManager() + tdm.add_source_data_field('src_array', + pointlikesource_to_data_field_array) + tdm.add_data_field('psi', psi_func) + + sin_dec_binning = ds.get_binning_definition('sin_dec') + log_energy_binning = ds.get_binning_definition('log_energy') + + # Create the spatial PDF ratio instance for this dataset. + spatial_sigpdf = RayleighPSFPointSourceSignalSpatialPDF( + dec_range=np.arcsin(sin_dec_binning.range)) + spatial_bkgpdf = DataBackgroundI3SpatialPDF( + data.exp, sin_dec_binning) + spatial_pdfratio = SpatialSigOverBkgPDFRatio( + spatial_sigpdf, spatial_bkgpdf) + + # Create the energy PDF ratio instance for this dataset. + energy_sigpdfset = PDSignalEnergyPDFSet( + ds=ds, + src_dec=source.dec, + flux_model=flux_model, + fitparam_grid_set=gamma_grid, + ppbar=ppbar + ) + smoothing_filter = BlockSmoothingFilter(nbins=1) + energy_bkgpdf = PDDataBackgroundI3EnergyPDF( + data.exp, log_energy_binning, sin_dec_binning, + smoothing_filter, kde_smoothing) + + energy_pdfratio = PDPDFRatio( + sig_pdf_set=energy_sigpdfset, + bkg_pdf=energy_bkgpdf, + cap_ratio=cap_ratio + ) + + pdfratios = [spatial_pdfratio, energy_pdfratio] + + analysis.add_dataset( + ds, data, pdfratios, tdm, event_selection_method) + + energy_cut_spline = create_energy_cut_spline( + ds, data.exp, spl_smooth[idx]) + energy_cut_splines.append(energy_cut_spline) + + pbar.increment() + pbar.finish() + + analysis.llhratio = analysis.construct_llhratio(minimizer, ppbar=ppbar) + analysis.construct_signal_generator( + llhratio=analysis.llhratio, energy_cut_splines=energy_cut_splines, + cut_sindec=cut_sindec) + + return analysis + + +if(__name__ == '__main__'): + p = argparse.ArgumentParser( + description='Calculates TS for a given source location using the ' + '10-year public point source sample.', + formatter_class=argparse.RawTextHelpFormatter + ) + p.add_argument( + '--dec', + default=23.8, + type=float, + help='The source declination in degrees.' + ) + p.add_argument( + '--ra', + default=216.76, + type=float, + help='The source right-ascention in degrees.' + ) + p.add_argument( + '--gamma-seed', + default=3, + type=float, + help='The seed value of the gamma fit parameter.' + ) + p.add_argument( + '--data_base_path', + default=None, + type=str, + help='The base path to the data samples (default=None)' + ) + p.add_argument( + '--seed', + default=1, + type=int, + help='The random number generator seed for the likelihood ' + 'minimization.' + ) + p.add_argument( + '--ncpu', + default=1, + type=int, + help='The number of CPUs to utilize where parallelization is possible.' + ) + p.add_argument( + '--cap-ratio', + action='store_true', + help='Switch to cap the energy PDF ratio.') + p.set_defaults(cap_ratio=False) + args = p.parse_args() + + # Setup `skyllh` package logging. + # To optimize logging set the logging level to the lowest handling level. + setup_logger('skyllh', logging.DEBUG) + log_format = '%(asctime)s %(processName)s %(name)s %(levelname)s: '\ + '%(message)s' + setup_console_handler('skyllh', logging.INFO, log_format) + setup_file_handler('skyllh', 'debug.log', + log_level=logging.DEBUG, + log_format=log_format) + + CFG['multiproc']['ncpu'] = args.ncpu + + sample_seasons = [ + ('PublicData_10y_ps', 'IC40'), + ('PublicData_10y_ps', 'IC59'), + ('PublicData_10y_ps', 'IC79'), + ('PublicData_10y_ps', 'IC86_I'), + ('PublicData_10y_ps', 'IC86_II-VII'), + ] + + datasets = [] + for (sample, season) in sample_seasons: + # Get the dataset from the correct dataset collection. + dsc = data_samples[sample].create_dataset_collection( + args.data_base_path) + datasets.append(dsc.get_dataset(season)) + + # Define a random state service. + rss = RandomStateService(args.seed) + + # Define the point source. + source = PointLikeSource(np.deg2rad(args.ra), np.deg2rad(args.dec)) + print('source: ', str(source)) + + tl = TimeLord() + + with tl.task_timer('Creating analysis.'): + ana = create_analysis( + datasets, + source, + cap_ratio=args.cap_ratio, + gamma_seed=args.gamma_seed, + tl=tl) + + with tl.task_timer('Unblinding data.'): + (TS, fitparam_dict, status) = ana.unblind(rss) + + print('TS = %g' % (TS)) + print('ns_fit = %g' % (fitparam_dict['ns'])) + print('gamma_fit = %g' % (fitparam_dict['gamma'])) + + print(tl) diff --git a/skyllh/analyses/i3/publicdata_ps/utils.py b/skyllh/analyses/i3/publicdata_ps/utils.py new file mode 100644 index 0000000000..00c381f37b --- /dev/null +++ b/skyllh/analyses/i3/publicdata_ps/utils.py @@ -0,0 +1,255 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +from scipy import interpolate +from scipy import integrate + +from skyllh.core.binning import get_bincenters_from_binedges + + +class FctSpline1D(object): + """Class to represent a 1D function spline using the PchipInterpolator + class from scipy. + + The evaluate the spline, use the ``__call__`` method. + """ + + def __init__(self, f, x_binedges, norm=False, **kwargs): + """Creates a new 1D function spline using the PchipInterpolator + class from scipy. + + Parameters + ---------- + f : (n_x,)-shaped 1D numpy ndarray + The numpy ndarray holding the function values at the bin centers. + x_binedges : (n_x+1,)-shaped 1D numpy ndarray + The numpy ndarray holding the bin edges of the x-axis. + norm : bool + Whether to precalculate and save normalization internally. + """ + super().__init__(**kwargs) + + self.x_binedges = np.copy(x_binedges) + + self.x_min = self.x_binedges[0] + self.x_max = self.x_binedges[-1] + + x = get_bincenters_from_binedges(self.x_binedges) + + self.spl_f = interpolate.PchipInterpolator( + x, f, extrapolate=False + ) + + self.norm = None + if norm: + self.norm = integrate.quad( + self.__call__, + self.x_min, + self.x_max, + limit=200, + full_output=1 + )[0] + + def __call__(self, x, oor_value=0): + """Evaluates the spline at the given x values. For x-values + outside the spline's range, the oor_value is returned. + + Parameters + ---------- + x : (n_x,)-shaped 1D numpy ndarray + The numpy ndarray holding the x values at which the spline should + get evaluated. + oor_value : float + The value for out-of-range (oor) coordinates. + + Returns + ------- + f : (n_x,)-shaped 1D numpy ndarray + The numpy ndarray holding the evaluated values of the spline. + """ + f = self.spl_f(x) + f = np.nan_to_num(f, nan=oor_value) + + return f + + def evaluate(self, *args, **kwargs): + """Alias for the __call__ method. + """ + return self(*args, **kwargs) + + +class FctSpline2D(object): + """Class to represent a 2D function spline using the RectBivariateSpline + class from scipy. + + The spline is constructed in the log10 space of the function value to + ensure a smooth spline. + + The evaluate the spline, use the ``__call__`` method. + """ + + def __init__(self, f, x_binedges, y_binedges, **kwargs): + """Creates a new 2D function spline using the RectBivariateSpline + class from scipy. + + Parameters + ---------- + f : (n_x, n_y)-shaped 2D numpy ndarray + he numpy ndarray holding the function values at the bin centers. + x_binedges : (n_x+1,)-shaped 1D numpy ndarray + The numpy ndarray holding the bin edges of the x-axis. + y_binedges : (n_y+1,)-shaped 1D numpy ndarray + The numpy ndarray holding the bin edges of the y-axis. + """ + super().__init__(**kwargs) + + self.x_binedges = np.copy(x_binedges) + self.y_binedges = np.copy(y_binedges) + + self.x_min = self.x_binedges[0] + self.x_max = self.x_binedges[-1] + self.y_min = self.y_binedges[0] + self.y_max = self.y_binedges[-1] + + x = get_bincenters_from_binedges(self.x_binedges) + y = get_bincenters_from_binedges(self.y_binedges) + + # Note: For simplicity we approximate zero bins with 1000x smaller + # values than the minimum value. To do this correctly, one should store + # the zero bins and return zero when those bins are requested. + z = np.empty(f.shape, dtype=np.double) + m = f > 0 + z[m] = np.log10(f[m]) + z[np.invert(m)] = np.min(z[m]) - 3 + + self.spl_log10_f = interpolate.RectBivariateSpline( + x, y, z, kx=3, ky=3, s=0) + + def __call__(self, x, y, oor_value=0): + """Evaluates the spline at the given coordinates. For coordinates + outside the spline's range, the oor_value is returned. + + Parameters + ---------- + x : (n_x,)-shaped 1D numpy ndarray + The numpy ndarray holding the x values at which the spline should + get evaluated. + y : (n_y,)-shaped 1D numpy ndarray + The numpy ndarray holding the y values at which the spline should + get evaluated. + oor_value : float + The value for out-of-range (oor) coordinates. + + Returns + ------- + f : (n_x, n_y)-shaped 2D numpy ndarray + The numpy ndarray holding the evaluated values of the spline. + """ + m_x_oor = (x < self.x_min) | (x > self.x_max) + m_y_oor = (y < self.y_min) | (y > self.y_max) + + (m_xx_oor, m_yy_oor) = np.meshgrid(m_x_oor, m_y_oor, indexing='ij') + m_xy_oor = m_xx_oor | m_yy_oor + + f = np.power(10, self.spl_log10_f(x, y)) + f[m_xy_oor] = oor_value + + return f + + +def psi_to_dec_and_ra(rss, src_dec, src_ra, psi): + """Generates random declinations and right-ascension coordinates for the + given source location and opening angle `psi`. + + Parameters + ---------- + rss : instance of RandomStateService + The instance of RandomStateService to use for drawing random numbers. + src_dec : float + The declination of the source in radians. + src_ra : float + The right-ascension of the source in radians. + psi : 1d ndarray of float + The opening-angle values in radians. + + Returns + ------- + dec : 1d ndarray of float + The declination values. + ra : 1d ndarray of float + The right-ascension values. + """ + + psi = np.atleast_1d(psi) + + # Transform everything in radians and convert the source declination + # to source zenith angle + a = psi + b = np.pi/2 - src_dec + c = src_ra + # Random rotation angle for the 2D circle + t = rss.random.uniform(0, 2*np.pi, size=len(psi)) + + # Parametrize the circle + x = ( + (np.sin(a)*np.cos(b)*np.cos(c)) * np.cos(t) + + (np.sin(a)*np.sin(c)) * np.sin(t) - + (np.cos(a)*np.sin(b)*np.cos(c)) + ) + y = ( + -(np.sin(a)*np.cos(b)*np.sin(c)) * np.cos(t) + + (np.sin(a)*np.cos(c)) * np.sin(t) + + (np.cos(a)*np.sin(b)*np.sin(c)) + ) + z = ( + (np.sin(a)*np.sin(b)) * np.cos(t) + + (np.cos(a)*np.cos(b)) + ) + + # Convert back to right-ascension and declination. + # This is to distinguish between diametrically opposite directions. + zen = np.arccos(z) + azi = np.arctan2(y, x) + + dec = np.pi/2 - zen + ra = np.pi - azi + + return (dec, ra) + + +def create_energy_cut_spline(ds, exp_data, spl_smooth): + + """Create the spline for the declination-dependent energy cut + that the signal generator needs for injection in the southern sky + Some special conditions are needed for IC79 and IC86_I, because + their experimental dataset shows events that should probably have + been cut by the IceCube selection. + """ + data_exp = exp_data.copy(keep_fields=['sin_dec', 'log_energy']) + if ds.name == 'IC79': + m = np.invert(np.logical_and( + data_exp['sin_dec'] < -0.75, + data_exp['log_energy'] < 4.2)) + data_exp = data_exp[m] + if ds.name == 'IC86_I': + m = np.invert(np.logical_and( + data_exp['sin_dec'] < -0.2, + data_exp['log_energy'] < 2.5)) + data_exp = data_exp[m] + + sin_dec_binning = ds.get_binning_definition('sin_dec') + sindec_edges = sin_dec_binning.binedges + min_log_e = np.zeros(len(sindec_edges)-1, dtype=float) + for i in range(len(sindec_edges)-1): + mask = np.logical_and( + data_exp['sin_dec'] >= sindec_edges[i], + data_exp['sin_dec'] < sindec_edges[i+1]) + min_log_e[i] = np.min(data_exp['log_energy'][mask]) + del data_exp + sindec_centers = 0.5 * (sindec_edges[1:]+sindec_edges[:-1]) + + spline = interpolate.UnivariateSpline( + sindec_centers, min_log_e, k=2, s=spl_smooth) + + return spline diff --git a/skyllh/core/analysis.py b/skyllh/core/analysis.py index 8f06aa7ae0..0c1dae27d4 100644 --- a/skyllh/core/analysis.py +++ b/skyllh/core/analysis.py @@ -5,7 +5,6 @@ import abc import numpy as np -import pickle from skyllh.core.py import ( classname, @@ -15,16 +14,12 @@ from skyllh.core.storage import DataFieldRecordArray from skyllh.core.dataset import ( Dataset, - DatasetData + DatasetData, ) from skyllh.core.parameters import ( FitParameter, SourceFitParameterMapper, - SingleSourceFitParameterMapper -) -from skyllh.core.pdf import ( - EnergyPDF, - SpatialPDF + SingleSourceFitParameterMapper, ) from skyllh.core.pdfratio import PDFRatio from skyllh.core.progressbar import ProgressBar @@ -35,15 +30,11 @@ SingleSourceDatasetSignalWeights, SingleSourceZeroSigH0SingleDatasetTCLLHRatio, MultiSourceZeroSigH0SingleDatasetTCLLHRatio, - MultiSourceDatasetSignalWeights + MultiSourceDatasetSignalWeights, ) -from skyllh.core.scrambling import DataScramblingMethod from skyllh.core.timing import TaskTimer from skyllh.core.trialdata import TrialDataManager -from skyllh.core.optimize import ( - EventSelectionMethod, - AllEventSelectionMethod -) +from skyllh.core.optimize import EventSelectionMethod from skyllh.core.source_hypothesis import SourceHypoGroupManager from skyllh.core.test_statistic import TestStatistic from skyllh.core.multiproc import get_ncpu, parallelize @@ -51,7 +42,7 @@ from skyllh.core.background_generator import BackgroundGenerator from skyllh.core.signal_generator import ( SignalGeneratorBase, - SignalGenerator + SignalGenerator, ) from skyllh.physics.source import SourceModel @@ -1034,7 +1025,7 @@ def do_trials( result_dtype = result_list[0].dtype result = np.empty(n, dtype=result_dtype) - result[:] = result_list[:] + result[:] = np.array(result_list)[:,0] return result diff --git a/skyllh/core/analysis_utils.py b/skyllh/core/analysis_utils.py index 8104874898..3d051939a0 100644 --- a/skyllh/core/analysis_utils.py +++ b/skyllh/core/analysis_utils.py @@ -1222,7 +1222,7 @@ def create_trial_data_file( def extend_trial_data_file( ana, rss, n_trials, trial_data, mean_n_sig=0, mean_n_sig_null=0, mean_n_bkg_list=None, bkg_kwargs=None, sig_kwargs=None, - pathfilename=None): + pathfilename=None, **kwargs): """Appends to the trial data file `n_trials` generated trials for each mean number of injected signal events up to `ns_max` for a given analysis. @@ -1266,6 +1266,12 @@ def extend_trial_data_file( `poisson`. pathfilename : string | None Trial data file path including the filename. + + Additional keyword arguments + ---------------------------- + Additional keyword arguments are passed-on to the ``create_trial_data_file`` + function. + Returns ------- trial_data : @@ -1278,11 +1284,19 @@ def extend_trial_data_file( enumerate(sorted(np.unique(trial_data['seed'])) + [None], 1) if i != e) rss.reseed(seed) + (seed, mean_n_sig, mean_n_sig_null, trials) = create_trial_data_file( - ana, rss, n_trials, - mean_n_sig=mean_n_sig) - trial_data = np_rfn.stack_arrays([trial_data, trials], usemask=False, - asrecarray=True) + ana=ana, + rss=rss, + n_trials=n_trials, + mean_n_sig=mean_n_sig, + **kwargs + ) + trial_data = np_rfn.stack_arrays( + [trial_data, trials], + usemask=False, + asrecarray=True) + if(pathfilename is not None): # Save the trial data to file. makedirs(os.path.dirname(pathfilename), exist_ok=True) diff --git a/skyllh/core/backgroundpdf.py b/skyllh/core/backgroundpdf.py index 1c12a52966..cb41ff7eb4 100644 --- a/skyllh/core/backgroundpdf.py +++ b/skyllh/core/backgroundpdf.py @@ -7,9 +7,12 @@ from skyllh.core.pdf import ( IsBackgroundPDF, MultiDimGridPDF, - NDPhotosplinePDF + NDPhotosplinePDF, + TimePDF, ) +import numpy as np + class BackgroundMultiDimGridPDF(MultiDimGridPDF, IsBackgroundPDF): """This class provides a multi-dimensional background PDF. The PDF is @@ -97,3 +100,92 @@ def __init__( path_to_pdf_splinefit=path_to_pdf_splinefit, norm_factor_func=norm_factor_func ) + + +class BackgroundUniformTimePDF(TimePDF, IsBackgroundPDF): + + def __init__(self, grl): + """Creates a new background time PDF instance as uniform background + + Parameters + ---------- + grl : ndarray + Array of the detector good run list + + """ + super(BackgroundUniformTimePDF, self).__init__() + self.start = grl["start"][0] + self.end = grl["stop"][-1] + self.grl = grl + + + def cdf(self, t): + """Compute the cumulative density function for the box pdf. This is + needed for normalization. + + Parameters + ---------- + t : float, ndarray + MJD times + + Returns + ------- + cdf : float, ndarray + Values of cumulative density function evaluated at t + """ + t_start = self.grl["start"][0] + t_end = self.grl["stop"][-1] + t = np.atleast_1d(t) + + cdf = np.zeros(t.size, float) + + # values between start and stop times + mask = (t_start <= t) & (t <= t_end) + cdf[mask] = (t[mask] - t_start) / [t_end - t_start] + + # take care of values beyond stop time in sample + + return cdf + + def norm_uptime(self): + """Compute the normalization with the dataset uptime. Distributions like + scipy.stats.norm are normalized (-inf, inf). + These must be re-normalized such that the function sums to 1 over the + finite good run list domain. + + Returns + ------- + norm : float + Normalization such that cdf sums to 1 over good run list domain + """ + + integral = (self.cdf(self.grl["stop"]) - self.cdf(self.grl["start"])).sum() + + if np.isclose(integral, 0): + return 0 + + return 1. / integral + + def get_prob(self, tdm, fitparams=None, tl=None): + """Calculates the background time probability density of each event. + + tdm : TrialDataManager + Unused interface argument. + fitparams : None + Unused interface argument. + tl : instance of TimeLord | None + The optional instance of TimeLord that should be used to collect + timing information about this method. + + Returns + ------- + pd : array of float + The (N,)-shaped ndarray holding the probability density for each event. + grads : empty array of float + Does not depend on fit parameter, so no gradient. + """ + livetime = self.grl["stop"][-1] - self.grl["start"][0] + pd = 1./livetime + grads = np.array([], dtype=np.double) + + return (pd, grads) diff --git a/skyllh/core/binning.py b/skyllh/core/binning.py index 4e3c4d55f0..f9918f9f65 100644 --- a/skyllh/core/binning.py +++ b/skyllh/core/binning.py @@ -2,8 +2,98 @@ import numpy as np +from scipy.linalg import solve + from skyllh.core.py import classname + +def get_bincenters_from_binedges(edges): + """Calculates the bin center values from the given bin edge values. + + Parameters + ---------- + edges : 1D numpy ndarray + The (n+1,)-shaped 1D ndarray holding the bin edge values. + + Returns + ------- + bincenters : 1D numpy ndarray + The (n,)-shaped 1D ndarray holding the bin center values. + """ + return 0.5*(edges[:-1] + edges[1:]) + +def get_binedges_from_bincenters(centers): + """Calculates the bin edges from the given bin center values. The bin center + values must be evenly spaced. + + Parameters + ---------- + centers : 1D numpy ndarray + The (n,)-shaped 1D ndarray holding the bin center values. + + Returns + ------- + edges : 1D numpy ndarray + The (n+1,)-shaped 1D ndarray holding the bin edge values. + """ + d = np.diff(centers) + if not np.all(np.isclose(np.diff(d), 0)): + raise ValueError('The bin center values are not evenly spaced!') + d = d[0] + + edges = np.zeros((len(centers)+1,), dtype=np.double) + edges[:-1] = centers - d/2 + edges[-1] = centers[-1] + d/2 + + return edges + +def get_bin_indices_from_lower_and_upper_binedges(le, ue, values): + """Returns the bin indices for the given values which must fall into bins + defined by the given lower and upper bin edges. + + Note: The upper edge is not included in the bin. + + Parameters + ---------- + le : (m,)-shaped 1D numpy ndarray + The lower bin edges. + ue : (m,)-shaped 1D numpy ndarray + The upper bin edges. + values : (n,)-shaped 1D numpy ndarray + The values for which to get the bin indices. + + Returns + ------- + idxs : (n,)-shaped 1D numpy ndarray + The bin indices of the given values. + """ + if len(le) != len(ue): + raise ValueError( + 'The lower {} and upper {} edge arrays must be of the same ' + 'size!'.format( + len(le), len(ue))) + + if np.any(values < le[0]): + invalid_values = values[values < le[0]] + raise ValueError( + '{} values ({}) are smaller than the lowest bin edge ({})!'.format( + len(invalid_values), str(invalid_values), le[0])) + if np.any(values >= ue[-1]): + invalid_values = values[values >= ue[-1]] + raise ValueError( + '{} values ({}) are larger or equal than the largest bin edge ' + '({})!'.format( + len(invalid_values), str(invalid_values), ue[-1])) + + m = ( + (values[:,np.newaxis] >= le[np.newaxis,:]) & + (values[:,np.newaxis] < ue[np.newaxis,:]) + ) + idxs = np.nonzero(m)[1] + + return idxs + + class BinningDefinition(object): """The BinningDefinition class provides a structure to hold histogram binning definitions for an analyis. @@ -74,6 +164,12 @@ def bincenters(self): """ return 0.5*(self._binedges[:-1] + self._binedges[1:]) + @property + def binwidths(self): + """(read-only) The widths of the bins. + """ + return np.diff(self._binedges) + @property def lower_edge(self): """The lowest bin edge of the binning. @@ -110,6 +206,15 @@ def any_data_out_of_binning_range(self, data): (data > self.upper_edge)) return outofrange + def get_binwidth_from_value(self, value): + """Returns the width of the bin the given value falls into. + """ + idx = np.digitize(value, self._binedges) - 1 + + bin_width = self.binwidths[idx] + + return bin_width + def get_subset(self, lower_edge, upper_edge): """Creates a new BinningDefinition instance which contains only a subset of the bins of this BinningDefinition instance. The range of the subset diff --git a/skyllh/core/dataset.py b/skyllh/core/dataset.py index ff5b804a88..ecefc308bd 100644 --- a/skyllh/core/dataset.py +++ b/skyllh/core/dataset.py @@ -116,8 +116,9 @@ def __init__( exp_pathfilenames : str | sequence of str | None The file name(s), including paths, of the experimental data file(s). This can be None, if a MC-only study is performed. - mc_pathfilenames : str | sequence of str + mc_pathfilenames : str | sequence of str | None The file name(s), including paths, of the monte-carlo data file(s). + This can be None, if a MC-less analysis is performed. livetime : float | None The integrated live-time in days of the dataset. It can be None for cases where the live-time is retrieved directly from the data files @@ -223,6 +224,8 @@ def mc_pathfilename_list(self): return self._mc_pathfilename_list @mc_pathfilename_list.setter def mc_pathfilename_list(self, pathfilenames): + if(pathfilenames is None): + pathfilenames = [] if(isinstance(pathfilenames, str)): pathfilenames = [pathfilenames] if(not issequenceof(pathfilenames, str)): @@ -702,10 +705,11 @@ def _conv_new2orig_field_names(new_field_names, orig2new_renaming_dict): # Load the experimental data if there is any. if(len(self._exp_pathfilename_list) > 0): - fileloader_exp = create_FileLoader(self.exp_abs_pathfilename_list) with TaskTimer(tl, 'Loading exp data from disk.'): + fileloader_exp = create_FileLoader( + self.exp_abs_pathfilename_list) # Create the list of field names that should get kept. - keep_fields = list(set( + keep_fields_exp = list(set( _conv_new2orig_field_names( CFG['dataset']['analysis_required_exp_field_names'] + self._loading_extra_exp_field_name_list + @@ -715,7 +719,7 @@ def _conv_new2orig_field_names(new_field_names, orig2new_renaming_dict): )) data_exp = fileloader_exp.load_data( - keep_fields=keep_fields, + keep_fields=keep_fields_exp, dtype_convertions=dtc_dict, dtype_convertion_except_fields=_conv_new2orig_field_names( dtc_except_fields, @@ -725,35 +729,41 @@ def _conv_new2orig_field_names(new_field_names, orig2new_renaming_dict): else: data_exp = None - # Load the monte-carlo data. - with TaskTimer(tl, 'Loading mc data from disk.'): - fileloader_mc = create_FileLoader(self.mc_abs_pathfilename_list) - keep_fields = list(set( - _conv_new2orig_field_names( - CFG['dataset']['analysis_required_exp_field_names'] + - self._loading_extra_exp_field_name_list + - keep_fields, - self._exp_field_name_renaming_dict) + - _conv_new2orig_field_names( - CFG['dataset']['analysis_required_mc_field_names'] + - self._loading_extra_mc_field_name_list + - keep_fields, - self._mc_field_name_renaming_dict) - )) - data_mc = fileloader_mc.load_data( - keep_fields=keep_fields, - dtype_convertions=dtc_dict, - dtype_convertion_except_fields=_conv_new2orig_field_names( - dtc_except_fields, - self._mc_field_name_renaming_dict), - efficiency_mode=efficiency_mode) - data_mc.rename_fields(self._mc_field_name_renaming_dict) + # Load the monte-carlo data if there is any. + if(len(self._mc_pathfilename_list) > 0): + with TaskTimer(tl, 'Loading mc data from disk.'): + fileloader_mc = create_FileLoader( + self.mc_abs_pathfilename_list) + # Determine `keep_fields_mc` for the generic case, where MC + # field names are an union of exp and mc field names. + # But the renaming dictionary can differ for exp and MC fields. + keep_fields_mc = list(set( + _conv_new2orig_field_names( + CFG['dataset']['analysis_required_exp_field_names'] + + self._loading_extra_exp_field_name_list + + keep_fields, + self._exp_field_name_renaming_dict) + + _conv_new2orig_field_names( + CFG['dataset']['analysis_required_exp_field_names'] + + self._loading_extra_exp_field_name_list + + CFG['dataset']['analysis_required_mc_field_names'] + + self._loading_extra_mc_field_name_list + + keep_fields, + self._mc_field_name_renaming_dict) + )) + data_mc = fileloader_mc.load_data( + keep_fields=keep_fields_mc, + dtype_convertions=dtc_dict, + dtype_convertion_except_fields=_conv_new2orig_field_names( + dtc_except_fields, + self._mc_field_name_renaming_dict), + efficiency_mode=efficiency_mode) + data_mc.rename_fields(self._mc_field_name_renaming_dict) + else: + data_mc = None if(livetime is None): livetime = self.livetime - if(livetime is None): - raise ValueError('No livetime was provided for dataset ' - '"%s"!'%(self.name)) data = DatasetData(data_exp, data_mc, livetime) @@ -947,13 +957,15 @@ def load_and_prepare_data( keep_fields ) data.exp.tidy_up(keep_fields=keep_fields_exp) - with TaskTimer(tl, 'Cleaning MC data.'): - keep_fields_mc = ( - CFG['dataset']['analysis_required_exp_field_names'] + - CFG['dataset']['analysis_required_mc_field_names'] + - keep_fields - ) - data.mc.tidy_up(keep_fields=keep_fields_mc) + + if(data.mc is not None): + with TaskTimer(tl, 'Cleaning MC data.'): + keep_fields_mc = ( + CFG['dataset']['analysis_required_exp_field_names'] + + CFG['dataset']['analysis_required_mc_field_names'] + + keep_fields + ) + data.mc.tidy_up(keep_fields=keep_fields_mc) with TaskTimer(tl, 'Asserting data format.'): assert_data_format(self, data) @@ -1361,8 +1373,11 @@ def get_dataset(self, name): collection. """ if(name not in self._datasets): + ds_names = '", "'.join(self.dataset_names) + ds_names = '"'+ds_names+'"' raise KeyError('The dataset "%s" is not part of the dataset ' - 'collection "%s"!'%(name, self.name)) + 'collection "%s"! Possible dataset names are: %s!'%( + name, self.name, ds_names)) return self._datasets[name] def get_datasets(self, names): @@ -1590,24 +1605,28 @@ def exp(self, data): @property def mc(self): """The DataFieldRecordArray instance holding the monte-carlo data. + This is None, if there is no monte-carlo data available. """ return self._mc @mc.setter def mc(self, data): - if(not isinstance(data, DataFieldRecordArray)): - raise TypeError('The mc property must be an instance of ' - 'DataFieldRecordArray!') + if(data is not None): + if(not isinstance(data, DataFieldRecordArray)): + raise TypeError('The mc property must be an instance of ' + 'DataFieldRecordArray!') self._mc = data @property def livetime(self): """The integrated livetime in days of the data. + This is None, if there is no live-time provided. """ return self._livetime @livetime.setter def livetime(self, lt): - lt = float_cast(lt, - 'The livetime property must be castable to type float!') + if(lt is not None): + lt = float_cast(lt, + 'The livetime property must be castable to type float!') self._livetime = lt @property @@ -1651,13 +1670,20 @@ def _get_missing_keys(keys, required_keys): 'experimental data of dataset "%s": '%(dataset.name)+ ', '.join(missing_exp_keys)) - # Check monte-carlo data keys. - missing_mc_keys = _get_missing_keys( - data.mc.field_name_list, - CFG['dataset']['analysis_required_exp_field_names'] + - CFG['dataset']['analysis_required_mc_field_names']) - if(len(missing_mc_keys) != 0): - raise KeyError('The following data fields are missing for the monte-carlo data of dataset "%s": '%(dataset.name)+', '.join(missing_mc_keys)) + if(data.mc is not None): + # Check monte-carlo data keys. + missing_mc_keys = _get_missing_keys( + data.mc.field_name_list, + CFG['dataset']['analysis_required_exp_field_names'] + + CFG['dataset']['analysis_required_mc_field_names']) + if(len(missing_mc_keys) != 0): + raise KeyError('The following data fields are missing for the ' + 'monte-carlo data of dataset "%s": '%(dataset.name)+ + ', '.join(missing_mc_keys)) + + if(data.livetime is None): + raise ValueError('No livetime was specified for dataset "{}"!'.format( + dataset.name)) def remove_events(data_exp, mjds): diff --git a/skyllh/core/expectation_maximization.py b/skyllh/core/expectation_maximization.py new file mode 100644 index 0000000000..c82db849a3 --- /dev/null +++ b/skyllh/core/expectation_maximization.py @@ -0,0 +1,152 @@ +import numpy as np +from scipy.stats import norm + +from skyllh.core.analysis import TimeIntegratedMultiDatasetSingleSourceAnalysis +from skyllh.core.backgroundpdf import BackgroundUniformTimePDF +from skyllh.core.pdf import TimePDF +from skyllh.core.pdfratio import SigOverBkgPDFRatio +from skyllh.core.random import RandomStateService +from skyllh.core.signalpdf import ( + SignalBoxTimePDF, + SignalGaussTimePDF, +) + + +def expectation_em(ns, mu, sigma, t, sob): + """ + Expectation step of expectation maximization. + + Parameters + ---------- + ns : float | 1d ndarray of float + The number of signal neutrinos, as weight for the gaussian flare. + mu : float | 1d ndarray of float + The mean time of the gaussian flare. + sigma: float | 1d ndarray of float + Sigma of the gaussian flare. + t : 1d ndarray of float + Times of the events. + sob : 1d ndarray of float + The signal over background values of events, or weights of events + + Returns + ------- + expectation : list of 1d ndarray of float + Weighted "responsibility" function of each event to belong to the flare. + sum_log_denom : float + Sum of log of denominators. + """ + ns = np.atleast_1d(ns) + mu = np.atleast_1d(mu) + sigma = np.atleast_1d(sigma) + + b_term = (1 - np.cos(10 / 180 * np.pi)) / 2 + N = len(t) + e_sig = [] + for i in range(len(ns)): + e_sig.append(norm(loc=mu[i], scale=sigma[i]).pdf(t) * sob * ns[i]) + e_bg = (N - np.sum(ns)) / (np.max(t) - np.min(t)) / b_term + denom = sum(e_sig) + e_bg + + return [e / denom for e in e_sig], np.sum(np.log(denom)) + + +def maximization_em(e_sig, t): + """ + Maximization step of expectation maximization. + + Parameters + ---------- + e_sig : list of 1d ndarray of float + The weights for each event from the expectation step. + t : 1d ndarray of float + The times of each event. + + Returns + ------- + mu : list of float + Best fit mean time of the gaussian flare. + sigma : list of float + Best fit sigma of the gaussian flare. + ns : list of float + Best fit number of signal neutrinos, as weight for the gaussian flare. + """ + mu = [] + sigma = [] + ns = [] + for i in range(len(e_sig)): + mu.append(np.average(t, weights=e_sig[i])) + sigma.append(np.sqrt(np.average(np.square(t - mu[i]), weights=e_sig[i]))) + ns.append(np.sum(e_sig[i])) + sigma = [max(1, s) for s in sigma] + + return mu, sigma, ns + + +def em_fit(x, weights, n=1, tol=1.e-200, iter_max=500, weight_thresh=0, initial_width=5000, + remove_x=None): + """Run expectation maximization. + + Parameters + ---------- + x : array[float] + Quantity to run EM on (e.g. the time if EM should find time flares) + weights : + weights for each event (e.g. the signal over background ratio) + fitparams : dict + Dictionary with value for gamma, e.g. {'gamma': 2}. + n : int + How many Gaussians flares we are looking for. + tol : float + the stopping criteria for expectation maximization. This is the difference in the normalized likelihood over the + last 20 iterations. + iter_max : int + The maximum number of iterations, even if stopping criteria tolerance (`tol`) is not yet reached. + weight_thresh : float + Set a minimum threshold for event weights. Events with smaller weights will be removed. + initial_width : float + Starting width for the gaussian flare in days. + remove_x : float | None + Specific x of event that should be removed. + + Returns + ------- + Mean, width, normalization factor + """ + + if weight_thresh > 0: # remove events below threshold + for i in range(len(weights)): + mask = weights > weight_thresh + weights[i] = weights[i][mask] + x[i] = x[i][mask] + + # in case, remove event + if remove_x is not None: + mask = x == remove_x + weights = weights[~mask] + x = x[~mask] + + # expectation maximization + mu = np.linspace(x[0], x[-1], n+2)[1:-1] + sigma = np.ones(n) * initial_width + ns = np.ones(n) * 10 + llh_diff = 100 + llh_old = 0 + llh_diff_list = [100] * 20 + + iteration = 0 + + while iteration < iter_max and llh_diff > tol: # run until convergence or maximum number of iterations + iteration += 1 + + e, logllh = expectation_em(ns, mu, sigma, x, weights) + + llh_new = np.sum(logllh) + tmp_diff = np.abs(llh_old - llh_new) / llh_new + llh_diff_list = llh_diff_list[:-1] + llh_diff_list.insert(0, tmp_diff) + llh_diff = np.max(llh_diff_list) + llh_old = llh_new + mu, sigma, ns = maximization_em(e, x) + + return mu, sigma, ns diff --git a/skyllh/core/minimizer.py b/skyllh/core/minimizer.py index 1677d5d00a..cf28c3d68e 100644 --- a/skyllh/core/minimizer.py +++ b/skyllh/core/minimizer.py @@ -927,8 +927,9 @@ def minimize(self, rss, fitparamset, func, args=None, kwargs=None): (fmin, grads) = func(xmin, *args) logger.debug( - '%s (%s): Minimized function: %d iterations, %d repetitions' % ( + '%s (%s): Minimized function: %d iterations, %d repetitions, ' + 'xmin=%s' % ( classname(self), classname(self._minimizer_impl), - self._minimizer_impl.get_niter(status), reps)) + self._minimizer_impl.get_niter(status), reps, str(xmin))) return (xmin, fmin, status) diff --git a/skyllh/core/pdf.py b/skyllh/core/pdf.py index b48ef8e6ad..de5bc4cfae 100644 --- a/skyllh/core/pdf.py +++ b/skyllh/core/pdf.py @@ -400,9 +400,10 @@ def assert_is_valid_for_trial_data(self, tdm): The method must raise a ValueError if the PDF is not valid for the given trial data. """ - raise NotImplementedError('The derived PDF class "%s" did not ' - 'implement the "assert_is_valid_for_trial_data" method!' % ( - classname(self))) + raise NotImplementedError( + 'The derived PDF class "%s" did not implement the ' + '"assert_is_valid_for_trial_data" method!' % ( + classname(self))) @abc.abstractmethod def get_prob(self, tdm, params=None, tl=None): @@ -544,9 +545,17 @@ def get_prob(self, tdm, params=None, tl=None): pdf1 = self._pdf1 pdf2 = self._pdf2 - with TaskTimer(tl, 'Get signal prob from table.'): - (prob1, grads1) = pdf1.get_prob(tdm, params, tl=tl) - (prob2, grads2) = pdf2.get_prob(tdm, params, tl=tl) + with TaskTimer(tl, 'Get prob from individual PDFs.'): + p1 = pdf1.get_prob(tdm, params, tl=tl) + if isinstance(p1, tuple): + (prob1, grads1) = p1 + else: + prob1 = p1 + p2 = pdf2.get_prob(tdm, params, tl=tl) + if isinstance(p2, tuple): + (prob2, grads2) = p2 + else: + prob2 = p2 prob = prob1 * prob2 @@ -1387,18 +1396,55 @@ def pdf_keys(self): @property def pdf_axes(self): + """DEPRECATED (read-only) The PDFAxes object of one of the PDFs of this + PDF set. + All PDFs of this set are supposed to have the same axes. + """ + return self.axes + + @property + def axes(self): """(read-only) The PDFAxes object of one of the PDFs of this PDF set. All PDFs of this set are supposed to have the same axes. """ key = next(iter(self._gridfitparams_hash_pdf_dict.keys())) return self._gridfitparams_hash_pdf_dict[key].axes + def __getitem__(self, k): + """(read-only) Returns the PDF for the given PDF key. + """ + return self._gridfitparams_hash_pdf_dict[k] + def items(self): """Returns the list of 2-element tuples for the PDF stored in this PDFSet object. """ return self._gridfitparams_hash_pdf_dict.items() + def make_pdf_key(self, gridfitparams): + """Creates the PDF key for the given grid fit parameter values. + + Parameters + ---------- + gridfitparams : dict | int + The dictionary with the grid fit parameters for which the PDF key + should get made. If an integer is given, it is assumed to be + the PDF key. + + Returns + ------- + pdf_key : int + The hash that represents the key for the PDF with the given grid + fit parameter values. + """ + if(isinstance(gridfitparams, int)): + return gridfitparams + if(isinstance(gridfitparams, dict)): + return make_params_hash(gridfitparams) + + raise TypeError( + 'The gridfitparams argument must be of type dict or int!') + def add_pdf(self, pdf, gridfitparams): """Adds the given PDF object for the given parameters to the internal registry. If this PDF set is not empty, the to-be-added PDF must have @@ -1425,10 +1471,9 @@ def add_pdf(self, pdf, gridfitparams): if(not isinstance(pdf, self.pdf_type)): raise TypeError('The pdf argument must be an instance of %s!' % ( typename(self.pdf_type))) - if(not isinstance(gridfitparams, dict)): - raise TypeError('The fitparams argument must be of type dict!') - gridfitparams_hash = make_params_hash(gridfitparams) + gridfitparams_hash = self.make_pdf_key(gridfitparams) + if(gridfitparams_hash in self._gridfitparams_hash_pdf_dict): raise KeyError('The PDF with grid fit parameters %s was already ' 'added!' % (str(gridfitparams))) @@ -1468,17 +1513,12 @@ def get_pdf(self, gridfitparams): KeyError If no PDF object was created for the given set of parameters. """ - if(isinstance(gridfitparams, int)): - gridfitparams_hash = gridfitparams - elif(isinstance(gridfitparams, dict)): - gridfitparams_hash = make_params_hash(gridfitparams) - else: - raise TypeError( - 'The gridfitparams argument must be of type dict or int!') + gridfitparams_hash = self.make_pdf_key(gridfitparams) if(gridfitparams_hash not in self._gridfitparams_hash_pdf_dict): raise KeyError( - 'No PDF was created for the parameter set "%s"!' % (str(gridfitparams))) + 'No PDF was created for the parameter set "%s"!' % + (str(gridfitparams))) pdf = self._gridfitparams_hash_pdf_dict[gridfitparams_hash] return pdf diff --git a/skyllh/core/pdfratio.py b/skyllh/core/pdfratio.py index 7932ccf499..cfe4de4cca 100644 --- a/skyllh/core/pdfratio.py +++ b/skyllh/core/pdfratio.py @@ -23,7 +23,8 @@ PDFSet, IsBackgroundPDF, IsSignalPDF, - SpatialPDF + SpatialPDF, + TimePDF ) from skyllh.core.timing import TaskTimer diff --git a/skyllh/core/py.py b/skyllh/core/py.py index c9c623148a..64f179b62c 100644 --- a/skyllh/core/py.py +++ b/skyllh/core/py.py @@ -84,6 +84,11 @@ def classname(obj): """ return typename(type(obj)) +def module_classname(obj): + """Returns the module and class name of the class instance ``obj``. + """ + return '{}.{}'.format(obj.__module__, classname(obj)) + def get_byte_size_prefix(size): """Determines the biggest size prefix for the given size in bytes such that the new size is still greater one. diff --git a/skyllh/core/scrambling.py b/skyllh/core/scrambling.py index 1ce4c58095..ab5c3454e0 100644 --- a/skyllh/core/scrambling.py +++ b/skyllh/core/scrambling.py @@ -37,13 +37,13 @@ def scramble(self, rss, data): class UniformRAScramblingMethod(DataScramblingMethod): - """The UniformRAScramblingMethod method performs right-ascention scrambling + r"""The UniformRAScramblingMethod method performs right-ascention scrambling uniformly within a given RA range. By default it's (0, 2\pi). Note: This alters only the ``ra`` values of the data! """ def __init__(self, ra_range=None): - """Initializes a new RAScramblingMethod instance. + r"""Initializes a new RAScramblingMethod instance. Parameters ---------- diff --git a/skyllh/core/signal_generator.py b/skyllh/core/signal_generator.py index d64fa750dd..f65d4d39b2 100644 --- a/skyllh/core/signal_generator.py +++ b/skyllh/core/signal_generator.py @@ -151,6 +151,8 @@ def __init__(self, src_hypo_group_manager, dataset_list, data_list, data_list : list of DatasetData instances The list of DatasetData instances holding the actual data of each dataset. The order must match the order of ``dataset_list``. + kwargs + A typical keyword argument is the instance of MultiDatasetTCLLHRatio. """ super().__init__( *args, @@ -376,7 +378,8 @@ def generate_signal_events(self, rss, mean, poisson=True): class MultiSourceSignalGenerator(SignalGenerator): """More optimal signal generator for multiple sources. """ - def __init__(self, src_hypo_group_manager, dataset_list, data_list): + def __init__(self, src_hypo_group_manager, dataset_list, data_list, + **kwargs): """Constructs a new signal generator instance. Parameters @@ -390,9 +393,11 @@ def __init__(self, src_hypo_group_manager, dataset_list, data_list): data_list : list of DatasetData instances The list of DatasetData instances holding the actual data of each dataset. The order must match the order of ``dataset_list``. + kwargs + A typical keyword argument is the instance of MultiDatasetTCLLHRatio. """ super(MultiSourceSignalGenerator, self).__init__( - src_hypo_group_manager, dataset_list, data_list) + src_hypo_group_manager, dataset_list, data_list, **kwargs) def _construct_signal_candidates(self): """Constructs an array holding pointer information of signal candidate diff --git a/skyllh/core/signalpdf.py b/skyllh/core/signalpdf.py index a0911173aa..749cfd6937 100644 --- a/skyllh/core/signalpdf.py +++ b/skyllh/core/signalpdf.py @@ -117,7 +117,6 @@ def get_prob(self, tdm, fitparams=None, tl=None): try: # angular difference is pre calculated prob = get_data('spatial_pdf_gauss') - src_ra = get_data('src_array')['ra'] if src_ev_idxs is None: prob = prob.reshape((len(get_data('src_array')), len(ra))) @@ -188,6 +187,95 @@ def get_prob(self, tdm, fitparams=None, tl=None): return (prob_res, np.atleast_2d(grads)) +class RayleighPSFPointSourceSignalSpatialPDF(SpatialPDF, IsSignalPDF): + """This spatial signal PDF model describes the spatial PDF for a point-like + source following a Rayleigh distribution in the opening angle between the + source and reconstructed muon direction. + Mathematically, it's the convolution of a point in the sky, i.e. the source + location, with the PSF. The result of this convolution has the following + form: + + 1/(2*\pi \sin \Psi) * \Psi/\sigma^2 \exp(-\Psi^2/(2*\sigma^2)), + + where \sigma is the spatial uncertainty of the event and \Psi the distance + on the sphere between the source and the data event. + + This PDF requires the `src_array` data field, that is numpy record ndarray + with the data fields `ra` and `dec` holding the right-ascention and + declination of the point-like sources, respectively. + """ + def __init__(self, ra_range=None, dec_range=None, **kwargs): + """Creates a new spatial signal PDF for point-like sources with a + Rayleigh point-spread-function (PSF). + + Parameters + ---------- + ra_range : 2-element tuple | None + The range in right-ascention this spatial PDF is valid for. + If set to None, the range (0, 2pi) is used. + dec_range : 2-element tuple | None + The range in declination this spatial PDF is valid for. + If set to None, the range (-pi/2, +pi/2) is used. + """ + if(ra_range is None): + ra_range = (0, 2*np.pi) + if(dec_range is None): + dec_range = (-np.pi/2, np.pi/2) + + super().__init__( + ra_range=ra_range, + dec_range=dec_range, + **kwargs + ) + + def get_prob(self, tdm, fitparams=None, tl=None): + """Calculates the spatial signal probability density of each event for + the defined source. + + Parameters + ---------- + tdm : instance of TrialDataManager + The TrialDataManager instance holding the trial event data for which + to calculate the PDF values. The following data fields need to be + present: + + 'psi' : float + The opening angle in radian between the source direction and the + reconstructed muon direction. + 'ang_err': float + The reconstruction uncertainty in radian of the data event. + + fitparams : None + Unused interface argument. + tl : TimeLord instance | None + The optional TimeLord instance to use for measuring timing + information. + + Returns + ------- + pd : (N_events,)-shaped numpy ndarray + The 1D numpy ndarray with the probability density for each event in + unit 1/rad. + grads : (0,)-shaped 1D numpy ndarray + Since this PDF does not depend on fit parameters, an empty array + is returned. + """ + get_data = tdm.get_data + + psi = get_data('psi') + sigma = get_data('ang_err') + + pd = ( + 0.5/(np.pi*np.sin(psi)) * + (psi / sigma**2) * + np.exp(-0.5*(psi/sigma)**2) + ) + + grads = np.array([], dtype=np.double) + + return (pd, grads) + + class SignalTimePDF(TimePDF, IsSignalPDF): """This class provides a time PDF class for a signal source. It consists of @@ -311,8 +399,8 @@ def assert_is_valid_for_exp_data(self, data_exp): time_axis.vmin, time_axis.vmax)) def get_prob(self, tdm, fitparams): - """Calculates the signal time probability of each event for the given - set of signal time fit parameter values. + """Calculates the signal time probability density of each event for the + given set of signal time fit parameter values. Parameters ---------- @@ -325,12 +413,12 @@ def get_prob(self, tdm, fitparams): The MJD time of the event. fitparams : dict The dictionary holding the signal time parameter values for which - the signal time probability should be calculated. + the signal time probability density should be calculated. Returns ------- - prob : array of float - The (N,)-shaped ndarray holding the probability for each event. + pd : (N_events,)-shaped numpy ndarray + The 1D numpy ndarray with the probability density for each event. """ # Update the time-profile if its fit-parameter values have changed and # recalculate self._I and self._S if an updated was actually performed. @@ -338,20 +426,230 @@ def get_prob(self, tdm, fitparams): if(updated): (self._I, self._S) = self._calculate_time_profile_I_and_S() - events_time = tdm.get_data('time') + time = tdm.get_data('time') # Get a mask of the event times which fall inside a detector on-time # interval. - on = self._livetime.is_on(events_time) + on = self._livetime.is_on(time) # The sum of the on-time integrals of the time profile, A, will be zero # if the time profile is entirly during detector off-time. - prob = np.zeros((tdm.n_selected_events,), dtype=np.float64) + pd = np.zeros((tdm.n_selected_events,), dtype=np.float64) if(self._S > 0): - prob[on] = self._time_profile.get_value( - events_time[on]) / (self._I * self._S) + pd[on] = self._time_profile.get_value( + time[on]) / (self._I * self._S) + + return pd + + +class SignalGaussTimePDF(TimePDF, IsSignalPDF): + + def __init__(self, grl, mu, sigma, **kwargs): + """Creates a new signal time PDF instance for a given time profile of + the source. + + Parameters + ---------- + grl : ndarray + Array of the detector good run list + mu : float + Mean of the gaussian flare. + sigma : float + Sigma of the gaussian flare. + """ + super(SignalGaussTimePDF, self).__init__(**kwargs) + self.mu = mu + self.sigma = sigma + self.grl = grl + + + def norm_uptime(self): + """Compute the normalization with the dataset uptime. Distributions like + scipy.stats.norm are normalized (-inf, inf). + These must be re-normalized such that the function sums to 1 over the + finite good run list domain. + + Returns + ------- + norm : float + Normalization such that cdf sums to 1 over good run list domain + """ + cdf = scp.stats.norm(self.mu, self.sigma).cdf + + integral = (cdf(self.grl["stop"]) - cdf(self.grl["start"])).sum() + + if np.isclose(integral, 0): + return 0 + + return 1. / integral + + + def get_prob(self, tdm, fitparams=None, tl=None): + """Calculates the signal time probability density of each event for the + given set of signal time fit parameter values. + + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial event data for + which to calculate the PDF value. The following data fields must + exist: + + - 'time' : float + The MJD time of the event. + + fitparams : None + Unused interface argument. + tl : TimeLord instance | None + The optional TimeLord instance to use for measuring timing + information. + + Returns + ------- + pd : (N_events,)-shaped numpy ndarray + The 1D numpy ndarray with the probability density for each event. + grads : empty array of float + Empty, since it does not depend on any fit parameter + """ + time = tdm.get_data('time') + + pd = scp.stats.norm.pdf(time, self.mu, self.sigma) * self.norm_uptime() + grads = np.array([], dtype=np.double) + + return (pd, grads) + + +class SignalBoxTimePDF(TimePDF, IsSignalPDF): + + def __init__(self, grl, start, end, **kwargs): + """Creates a new signal time PDF instance for a given time profile of + the source. + + Parameters + ---------- + grl : ndarray + Array of the detector good run list + start : float + Start time of box profile. + end : float + End time of box profile. + """ + super(SignalBoxTimePDF, self).__init__(**kwargs) + self.start = start + self.end = end + self.grl = grl + + + def cdf(self, t): + """Compute the cumulative density function for the box pdf. This is needed for normalization. + + Parameters + ---------- + t : float, ndarray + MJD times + + Returns + ------- + cdf : float, ndarray + Values of cumulative density function evaluated at t + """ + t_start = self.start + t_end = self.end + t = np.atleast_1d(t) + + cdf = np.zeros(t.size, float) + sample_start = self.grl["start"][0] + sample_end = self.grl["stop"][-1] + + if t_start < sample_start and t_end > sample_start: + t_start = sample_start + if t_end > sample_end and t_start < sample_end: + t_end = sample_end + + # values between start and stop times + mask = (t_start <= t) & (t <= t_end) + cdf[mask] = (t[mask] - t_start) / [t_end - t_start] + + # take care of values beyond stop time in sample + if t_end > sample_start: + mask = (t_end < t) + cdf[mask] = 1. + + return cdf + + + def norm_uptime(self): + """Compute the normalization with the dataset uptime. Distributions like + scipy.stats.norm are normalized (-inf, inf). + These must be re-normalized such that the function sums to 1 over the + finite good run list domain. + + Returns + ------- + norm : float + Normalization such that cdf sums to 1 over good run list domain + """ + integral = (self.cdf(self.grl["stop"]) - self.cdf(self.grl["start"])).sum() + + if np.isclose(integral, 0): + return 0 + + return 1. / integral + + + def get_prob(self, tdm, fitparams=None, tl=None): + """Calculates the signal time probability of each event for the given + set of signal time fit parameter values. + + Parameters + ---------- + tdm : instance of TrialDataManager + The instance of TrialDataManager holding the trial event data for + which to calculate the PDF value. The following data fields must + exist: + + - 'time' : float + The MJD time of the event. + + fitparams : None + Unused interface argument. + tl : TimeLord instance | None + The optional TimeLord instance to use for measuring timing + information. + + Returns + ------- + pd : (N_events,)-shaped numpy ndarray + The 1D numpy ndarray with the probability density for each event. + grads : empty array of float + Does not depend on fit parameter, so no gradient + """ + time = tdm.get_data('time') + + # Get a mask of the event times which fall inside a detector on-time + # interval. + # Gives 0 for outside the flare and 1 for inside the flare. + box_mask = np.piecewise(time, [self.start <= time, time <= self.end], [1., 1.]) + + sample_start = self.grl["start"][0] + sample_end = self.grl["stop"][-1] + + t_start = self.start + t_end = self.end + # check if the whole flare lies in this dataset for normalization. + # If one part lies outside, adjust to datasample start or end time. + # For the case where everything lies outside, the pdf will be 0 by definition. + if t_start < sample_start and t_end > sample_start: + t_start = sample_start + if t_end > sample_end and t_start < sample_end: + t_end = sample_end + + pd = box_mask / (t_end - t_start) * self.norm_uptime() + grads = np.array([], dtype=np.double) + + return (pd, grads) + - return prob class SignalMultiDimGridPDF(MultiDimGridPDF, IsSignalPDF): diff --git a/skyllh/core/storage.py b/skyllh/core/storage.py index 1179304b4e..b05a85c164 100644 --- a/skyllh/core/storage.py +++ b/skyllh/core/storage.py @@ -81,7 +81,8 @@ def create_FileLoader(pathfilenames, **kwargs): cls = _FILE_LOADER_REG[fmt] return cls(pathfilenames, **kwargs) - raise RuntimeError('No FileLoader class is suitable to load the data file "%s"!'%(pathfilenames[0])) + raise RuntimeError('No FileLoader class is suitable to load the data file ' + '"%s"!'%(pathfilenames[0])) def assert_file_exists(pathfilename): """Checks if the given file exists and raises a RuntimeError if it does @@ -92,7 +93,8 @@ def assert_file_exists(pathfilename): class FileLoader(object, metaclass=abc.ABCMeta): - + """Abstract base class for a FileLoader class. + """ def __init__(self, pathfilenames, **kwargs): """Initializes a new FileLoader instance. @@ -408,6 +410,224 @@ def load_data(self, **kwargs): return data +class TextFileLoader(FileLoader): + """The TextFileLoader class provides the data loading functionality for + data text files where values are stored in a comma, or whitespace, separated + format. It uses the numpy.loadtxt function to load the data. It reads the + first line of the text file for a table header. + """ + def __init__(self, pathfilenames, header_comment='#', header_separator=None, + **kwargs): + """Creates a new file loader instance for a text data file. + + Parameters + ---------- + pathfilenames : str | sequence of str + The sequence of fully qualified file names of the data files that + need to be loaded. + header_comment : str + The character that defines a comment line in the text file. + header_separator : str | None + The separator of the header field names. If None, it assumes + whitespaces. + """ + super().__init__(pathfilenames, **kwargs) + + self.header_comment = header_comment + self.header_separator = header_separator + + @property + def header_comment(self): + """The character that defines a comment line in the text file. + """ + return self._header_comment + @header_comment.setter + def header_comment(self, s): + if(not isinstance(s, str)): + raise TypeError('The header_comment property must be of type str!') + self._header_comment = s + + @property + def header_separator(self): + """The separator of the header field names. If None, it assumes + whitespaces. + """ + return self._header_separator + @header_separator.setter + def header_separator(self, s): + if(s is not None): + if(not isinstance(s, str)): + raise TypeError('The header_separator property must be None or ' + 'of type str!') + self._header_separator = s + + def _extract_column_names(self, line): + """Tries to extract the column names of the data table based on the + given line. + + Parameters + ---------- + line : str + The text line containing the column names. + + Returns + ------- + names : list of str | None + The column names. + It returns None, if the column names cannot be extracted. + """ + # Remove possible new-line character and leading white-spaces. + line = line.strip() + # Check if the line is a comment line. + if(line[0:len(self._header_comment)] != self._header_comment): + return None + # Remove the leading comment character(s). + line = line.strip(self._header_comment) + # Remove possible leading whitespace characters. + line = line.strip() + # Split the line into the column names. + names = line.split(self._header_separator) + # Remove possible whitespaces of column names. + names = [ n.strip() for n in names ] + + if(len(names) == 0): + return None + + return names + + def _load_file(self, pathfilename, keep_fields, dtype_convertions, + dtype_convertion_except_fields): + """Loads the given file. + + Parameters + ---------- + pathfilename : str + The fully qualified file name of the data file that + need to be loaded. + keep_fields : str | sequence of str | None + Load the data into memory only for these data fields. If set to + ``None``, all in-file-present data fields are loaded into memory. + dtype_convertions : dict | None + If not None, this dictionary defines how data fields of specific + data types get converted into the specified data types. + This can be used to use less memory. + dtype_convertion_except_fields : str | sequence of str | None + The sequence of field names whose data type should not get + converted. + + Returns + ------- + data : DataFieldRecordArray instance + The DataFieldRecordArray instance holding the loaded data. + """ + assert_file_exists(pathfilename) + + with open(pathfilename, 'r') as ifile: + line = ifile.readline() + column_names = self._extract_column_names(line) + if(column_names is None): + raise ValueError('The data text file "{}" does not contain a ' + 'readable table header as first line!'.format(pathfilename)) + usecols = None + dtype = [(n,np.float64) for n in column_names] + if(keep_fields is not None): + # Select only the given columns. + usecols = [] + dtype = [] + for (idx,name) in enumerate(column_names): + if(name in keep_fields): + usecols.append(idx) + dtype.append((name,np.float64)) + usecols = tuple(usecols) + if(len(dtype) == 0): + raise ValueError('No data columns were selected to be loaded!') + + data_ndarray = np.loadtxt(ifile, + dtype=dtype, + comments=self._header_comment, + usecols=usecols) + + data = DataFieldRecordArray( + data_ndarray, + keep_fields=keep_fields, + dtype_convertions=dtype_convertions, + dtype_convertion_except_fields=dtype_convertion_except_fields, + copy=False) + + return data + + def load_data(self, keep_fields=None, dtype_convertions=None, + dtype_convertion_except_fields=None, **kwargs): + """Loads the data from the data files specified through their fully + qualified file names. + + Parameters + ---------- + keep_fields : str | sequence of str | None + Load the data into memory only for these data fields. If set to + ``None``, all in-file-present data fields are loaded into memory. + dtype_convertions : dict | None + If not None, this dictionary defines how data fields of specific + data types get converted into the specified data types. + This can be used to use less memory. + dtype_convertion_except_fields : str | sequence of str | None + The sequence of field names whose data type should not get + converted. + + Returns + ------- + data : DataFieldRecordArray + The DataFieldRecordArray holding the loaded data. + + Raises + ------ + RuntimeError + If a file does not exist. + ValueError + If the table header cannot be read. + """ + if(keep_fields is not None): + if(isinstance(keep_fields, str)): + keep_fields = [ keep_fields ] + elif(not issequenceof(keep_fields, str)): + raise TypeError('The keep_fields argument must be None, an ' + 'instance of type str, or a sequence of instances of ' + 'type str!') + + if(dtype_convertions is None): + dtype_convertions = dict() + elif(not isinstance(dtype_convertions, dict)): + raise TypeError('The dtype_convertions argument must be None, ' + 'or an instance of dict!') + + if(dtype_convertion_except_fields is None): + dtype_convertion_except_fields = [] + elif(isinstance(dtype_convertion_except_fields, str)): + dtype_convertion_except_fields = [ dtype_convertion_except_fields ] + elif(not issequenceof(dtype_convertion_except_fields, str)): + raise TypeError('The dtype_convertion_except_fields argument ' + 'must be a sequence of str instances.') + + # Load the first data file. + data = self._load_file( + self._pathfilename_list[0], + keep_fields=keep_fields, + dtype_convertions=dtype_convertions, + dtype_convertion_except_fields=dtype_convertion_except_fields + ) + + # Load possible subsequent data files by appending to the first data. + for i in range(1, len(self._pathfilename_list)): + data.append(self._load_file( + self._pathfilename_list[i], + keep_fields=keep_fields, + dtype_convertions=dtype_convertions, + dtype_convertion_except_fields=dtype_convertion_except_fields + )) + + return data + + class DataFieldRecordArray(object): """The DataFieldRecordArray class provides a data container similar to a numpy record ndarray. But the data fields are stored as individual numpy ndarray @@ -977,3 +1197,4 @@ def sort_by_field(self, name): register_FileLoader(['.npy'], NPYFileLoader) register_FileLoader(['.pkl'], PKLFileLoader) +register_FileLoader(['.csv'], TextFileLoader) diff --git a/skyllh/datasets/__init__.py b/skyllh/datasets/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/skyllh/datasets/i3/PublicData_10y_ps.py b/skyllh/datasets/i3/PublicData_10y_ps.py new file mode 100644 index 0000000000..6a14723fe8 --- /dev/null +++ b/skyllh/datasets/i3/PublicData_10y_ps.py @@ -0,0 +1,558 @@ +# -*- coding: utf-8 -*- +# Author: Dr. Martin Wolf + +import numpy as np + +from skyllh.core.dataset import DatasetCollection +from skyllh.i3.dataset import I3Dataset + + +def create_dataset_collection(base_path=None, sub_path_fmt=None): + """Defines the dataset collection for IceCube's 10-year + point-source public data, which is available at + http://icecube.wisc.edu/data-releases/20210126_PS-IC40-IC86_VII.zip + + Parameters + ---------- + base_path : str | None + The base path of the data files. The actual path of a data file is + assumed to be of the structure //. + If None, use the default path CFG['repository']['base_path']. + sub_path_fmt : str | None + The sub path format of the data files of the public data sample. + If None, use the default sub path format + 'icecube_10year_ps'. + + Returns + ------- + dsc : DatasetCollection + The dataset collection containing all the seasons as individual + I3Dataset objects. + """ + # Define the version of the data sample (collection). + (version, verqualifiers) = (1, dict(p=0)) + + # Define the default sub path format. + default_sub_path_fmt = 'icecube_10year_ps' + + # We create a dataset collection that will hold the individual seasonal + # public data datasets (all of the same version!). + dsc = DatasetCollection('Public Data 10-year point-source') + + dsc.description = """ + The events contained in this release correspond to the IceCube's + time-integrated point source search with 10 years of data [2]. Please refer + to the description of the sample and known changes in the text at [1]. + + The data contained in this release of IceCube’s point source sample shows + evidence of a cumulative excess of events from four sources (NGC 1068, + TXS 0506+056, PKS 1424+240, and GB6 J1542+6129) from a catalogue of 110 + potential sources. NGC 1068 gives the largest excess and is coincidentally + the hottest spot in the full Northern sky search [1]. + + Data from IC86-2012 through IC86-2014 used in [2] use an updated selection + and reconstruction compared to the 7 year time-integrated search [3] and the + detection of the 2014-2015 neutrino flare from the direction of + TXS 0506+056 [4]. The 7 year and 10 year versions of the sample show + overlaps of between 80 and 90%. + + An a posteriori cross check of the updated sample has been performed on + TXS 0506+056 showing two previously-significant cascade-like events removed + in the newer sample. These two events occur near the blazar's position + during the TXS flare and give large reconstructed energies, but are likely + not well-modeled by the track-like reconstructions included in this + selection. While the events are unlikely to be track-like, their + contribution to previous results has been handled properly. + + While the significance of the 2014-2015 TXS 0505+56 flare has decreased from + p=7.0e-5 to 8.1e-3, the change is a result of changes to the sample and not + of increased data. No problems have been identified with the previously + published results and since we have no reason a priori to prefer the new + sample over the old sample, these results do not supercede those in [4]. + + This release contains data beginning in 2008 (IC40) until the spring of 2018 + (IC86-2017). This release duplicates and supplants previously released data + from 2012 and earlier. Events from this release cannot be combined with any + other releases + + ----------------------------------------- + # Experimental data events + ----------------------------------------- + The "events" folder contains the events observed in the 10 year sample of + IceCube's point source neutrino selection. Each file corresponds to a single + season of IceCube datataking, including roughly one year of data. For each + event, reconstructed particle information is included. + + - MJD: The MJD time (ut1) of the event interaction given to 1e-8 days, + corresponding to roughly millisecond precision. + + - log10(E/GeV): The reconstructed energy of a muon passing through the + detector. The reconstruction follows the prescription for unfolding the + given in Section 8 of [5]. + + - AngErr[deg]: The estimated angular uncertainty on the reconstructed + direction given in degrees. The angular uncertainty is assumed to be + symmetric in azimuth and zenith and is used to calculate the signal spatial + probabilities for each event following the procedure given in [6]. The + errors are calibrated using simulated events so that they provide correct + coverage for an E^{-2} power law flux. This sample assumes a lower limit on + the estimated angular uncertainty of 0.2 degrees. + + - RA[deg], Dec[deg]: The right ascension and declination (J2000) + corresponding to the particle's reconstructed origin. Given in degrees. + + - Azimuth[deg], Zenith[deg]: The local coordinates of the particle's + reconstructed origin. + + The local coordinates may be necessary when searching for transient + phenomena on timescales shorter than 1 day due to non-uniformity in the + detector's response as a function of azimuth. In these cases, we recommend + scrambling events in time, then using the local coordinates and time to + calculate new RA and Dec values. + + Note that during the preparation of this data release, one duplicated event + was discovered in the IC86-2015 season. This event has not contributed to + any significant excesses. + + ----------------------------------------- + # Detector uptime + ----------------------------------------- + In order to properly account for detector uptime, IceCube maintains + "good run lists". These contain information about "good runs", periods of + datataking useful for analysis. Data may be marked unusable for various + reasons, including major construction or upgrade work, calibration runs, or + other anomalies. The "uptime" folder contains lists of the good runs for + each season. + + - MJD_start[days], MJD_stop[days]: The start and end times for each good run + + ----------------------------------------- + # Instrument response functions + ----------------------------------------- + In order to best model the response of the IceCube detector to a given + signal, Monte Carlo simulations are produced for each detector + configuration. Events are sampled from these simulations to model the + response of point sources from an arbitrary source and spectrum. + + We provide several binned responses for the detector in the "irfs" folder + of this data release. + + ------------------ + # Effective Areas + ------------------ + The effective area is a property of the detector and selection which, when + convolved with a flux model, gives the expected rate of events in the + detector. Here we release the muon neutrino effective areas for each season + of data. + + The effective areas are averaged over bins using simulated muon neutrino + events ranging from 100 GeV to 100 PeV. Because the response varies widely + in both energy and declination, we provide the tabulated response in these + two dimensions. Due to IceCube's unique position at the south pole, the + effective area is uniform in right ascension for timescales longer than + 1 day. It varies by about 10% as a function of azimuth, an effect which may + be important for shorter timescales. While the azimuthal effective areas are + not included here, they are included in IceCube's internal analyses. + These may be made available upon request. + + Tabulated versions of the effective area are included in csv files in the + "irfs" folder. Plotted versions are included as pdf files in the same + location. Because the detector configuration and selection were unchanged + after the IC86-2012 season, the effective area for this season should be + used for IC86-2012 through IC86-2017. + + - log10(E_nu/GeV)_min, log10(E_nu/GeV)_max: The minimum and maximum of the + energy bin used to caclulate the average effective area. Note that this uses + the neutrino's true energy and not the reconstructed muon energy. + + - Dec_nu_min[deg], Dec_nu_max[deg]: The minimum and maximum of the + declination of the neutrino origin. Again, note that this is the true + direction of the neutrino and not the reconstructed muon direction. + + - A_Eff[cm^2]: The average effective area across a bin. + + ------------------ + # Smearing Matrices + ------------------ + IceCube has a nontrivial smearing matrix with correlations between the + directional uncertainty, the point spread function, and the reconstructed + muon energy. To provide the most complete set of information, we include + tables of these responses for each season from IC40 through IC86-2012. + Seasons after IC86-2012 reuse that season's response functions. + + The included smearing matrices take the form of 5D tables mapping a + (E_nu, Dec_nu) bin in effective area to a 3D matrix of (E, PSF, AngErr). + The contents of each 3D matrix bin give the fractional count of simulated + events within the bin relative to all events in the (E_nu, Dec_nu) bin. + + Fractional_Counts = [Events in (E_nu, Dec_nu, E, PSF, AngErr)] / + [Events in (E_nu, Dec_nu)] + + The simulations statistics, while large enough for direct sampling, are + limited when producing these tables, ranging from just 621,858 simulated + events for IC40 to 11,595,414 simulated events for IC86-2012. In order to + reduce statistical uncertainties in each 5D bin, bins are selected in each + (E_nu, Dec_nu) bin independently. The bin edges are given in the smearing + matrix files. All locations not given have a Fractional_Counts of 0. + + - log10(E_nu/GeV)_min, log10(E_nu/GeV)_max: The minimum and maximum of the + energy bin used to caclulate the average effective area. Note that this uses + the neutrino's true energy and not the reconstructed muon energy. + + - Dec_nu_min[deg], Dec_nu_max[deg]: The minimum and maximum of the + declination of the neutrino origin. Again, note that this is the true + direction of the neutrino and not the reconstructed muon direction. + + - log10(E/GeV): The reconstructed energy of a muon passing through the + detector. The reconstruction follows the prescription for unfolding the + given in Section 8 of [5]. + + - PSF_min[deg], PSF_max[deg]: The minimum and maximum of the true angle + between the neutrino origin and the reconstructed muon direction. + + - AngErr_min[deg], AngErr_max[deg]: The estimated angular uncertainty on the + reconstructed direction given in degrees. The angular uncertainty is assumed + to be symmetric in azimuth and zenith and is used to calculate the signal + spatial probabilities for each event following the procedure given in [6]. + The errors are calibrated so that they provide correct coverage for an + E^{-2} power law flux. This sample assumes a lower limit on the estimated + angular uncertainty of 0.2 degrees. + + - Fractional_Counts: The fraction of simulated events falling within each + 5D bin relative to all events in the (E_nu, Dec_nu) bin. + + ----------------------------------------- + # References + ----------------------------------------- + [1] IceCube Data for Neutrino Point-Source Searches: Years 2008-2018, + [ArXiv link](https://arxiv.org/abs/2101.09836) + [2] Time-integrated Neutrino Source Searches with 10 years of IceCube Data, + Phys. Rev. Lett. 124, 051103 (2020) + [3] All-sky search for time-integrated neutrino emission from astrophysical + sources with 7 years of IceCube data, + Astrophys. J., 835 (2017) no. 2, 151 + [4] Neutrino emission from the direction of the blazar TXS 0506+056 prior to + the IceCube-170922A alert, + Science 361, 147-151 (2018) + [5] Energy Reconstruction Methods in the IceCube Neutrino Telescope, + JINST 9 (2014), P03009 + [6] Methods for point source analysis in high energy neutrino telescopes, + Astropart.Phys.29:299-305,2008 + + ----------------------------------------- + # Last Update + ----------------------------------------- + 28 January 2021 + """ + + # Define the common keyword arguments for all data sets. + ds_kwargs = dict( + livetime = None, + version = version, + verqualifiers = verqualifiers, + base_path = base_path, + default_sub_path_fmt = default_sub_path_fmt, + sub_path_fmt = sub_path_fmt + ) + + grl_field_name_renaming_dict = { + 'MJD_start[days]': 'start', + 'MJD_stop[days]': 'stop' + } + + # Define the datasets for the different seasons. + # For the declination and energy binning we use the same binning as was + # used in the original point-source analysis using the PointSourceTracks + # dataset. + + # ---------- IC40 ---------------------------------------------------------- + IC40 = I3Dataset( + name = 'IC40', + exp_pathfilenames = 'events/IC40_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC40_exp.csv', + **ds_kwargs + ) + IC40.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC40.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC40_effectiveArea.csv') + IC40.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC40_smearing.csv') + + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.25, 10 + 1), + np.linspace(-0.25, 0.0, 10 + 1), + np.linspace(0.0, 1., 10 + 1), + ])) + IC40.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(2., 9.5 + 0.01, 0.125) + IC40.define_binning('log_energy', energy_bins) + + # ---------- IC59 ---------------------------------------------------------- + IC59 = I3Dataset( + name = 'IC59', + exp_pathfilenames = 'events/IC59_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC59_exp.csv', + **ds_kwargs + ) + IC59.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC59.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC59_effectiveArea.csv') + IC59.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC59_smearing.csv') + + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.95, 2 + 1), + np.linspace(-0.95, -0.25, 25 + 1), + np.linspace(-0.25, 0.05, 15 + 1), + np.linspace(0.05, 1., 10 + 1), + ])) + IC59.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(2., 9.5 + 0.01, 0.125) + IC59.define_binning('log_energy', energy_bins) + + # ---------- IC79 ---------------------------------------------------------- + IC79 = I3Dataset( + name = 'IC79', + exp_pathfilenames = 'events/IC79_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC79_exp.csv', + **ds_kwargs + ) + IC79.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC79.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC79_effectiveArea.csv') + IC79.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC79_smearing.csv') + + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.75, 10 + 1), + np.linspace(-0.75, 0., 15 + 1), + np.linspace(0., 1., 20 + 1) + ])) + IC79.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(2., 9.5 + 0.01, 0.125) + IC79.define_binning('log_energy', energy_bins) + + # ---------- IC86-I -------------------------------------------------------- + IC86_I = I3Dataset( + name = 'IC86_I', + exp_pathfilenames = 'events/IC86_I_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC86_I_exp.csv', + **ds_kwargs + ) + IC86_I.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_I.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_I_effectiveArea.csv') + IC86_I.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_I_smearing.csv') + + b = np.sin(np.radians(-5.)) # North/South transition boundary. + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.2, 10 + 1), + np.linspace(-0.2, b, 4 + 1), + np.linspace(b, 0.2, 5 + 1), + np.linspace(0.2, 1., 10), + ])) + IC86_I.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(1., 10.5 + 0.01, 0.125) + IC86_I.define_binning('log_energy', energy_bins) + + # ---------- IC86-II ------------------------------------------------------- + IC86_II = I3Dataset( + name = 'IC86_II', + exp_pathfilenames = 'events/IC86_II_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC86_II_exp.csv', + **ds_kwargs + ) + IC86_II.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_II.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_II.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.93, 4 + 1), + np.linspace(-0.93, -0.3, 10 + 1), + np.linspace(-0.3, 0.05, 9 + 1), + np.linspace(0.05, 1., 18 + 1), + ])) + IC86_II.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(0.5, 9.5 + 0.01, 0.125) + IC86_II.define_binning('log_energy', energy_bins) + + # ---------- IC86-III ------------------------------------------------------ + IC86_III = I3Dataset( + name = 'IC86_III', + exp_pathfilenames = 'events/IC86_III_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC86_III_exp.csv', + **ds_kwargs + ) + IC86_III.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_III.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_III.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + + IC86_III.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_III.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-IV ------------------------------------------------------- + IC86_IV = I3Dataset( + name = 'IC86_IV', + exp_pathfilenames = 'events/IC86_IV_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC86_IV_exp.csv', + **ds_kwargs + ) + IC86_IV.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_IV.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_IV.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + + IC86_IV.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_IV.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-V -------------------------------------------------------- + IC86_V = I3Dataset( + name = 'IC86_V', + exp_pathfilenames = 'events/IC86_V_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC86_V_exp.csv', + **ds_kwargs + ) + IC86_V.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_V.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_V.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + + IC86_V.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_V.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-VI ------------------------------------------------------- + IC86_VI = I3Dataset( + name = 'IC86_VI', + exp_pathfilenames = 'events/IC86_VI_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC86_VI_exp.csv', + **ds_kwargs + ) + IC86_VI.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_VI.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_VI.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + + IC86_VI.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_VI.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-VII ------------------------------------------------------ + IC86_VII = I3Dataset( + name = 'IC86_VII', + exp_pathfilenames = 'events/IC86_VII_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC86_VII_exp.csv', + **ds_kwargs + ) + IC86_VII.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_VII.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_VII.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + + IC86_VII.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_VII.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-II-VII --------------------------------------------------- + ds_list = [ + IC86_II, + IC86_III, + IC86_IV, + IC86_V, + IC86_VI, + IC86_VII, + ] + IC86_II_VII = I3Dataset( + name = 'IC86_II-VII', + exp_pathfilenames = I3Dataset.get_combined_exp_pathfilenames(ds_list), + mc_pathfilenames = None, + grl_pathfilenames = I3Dataset.get_combined_grl_pathfilenames(ds_list), + **ds_kwargs + ) + IC86_II_VII.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_II_VII.add_aux_data_definition( + 'eff_area_datafile', + IC86_II.get_aux_data_definition('eff_area_datafile')) + + IC86_II_VII.add_aux_data_definition( + 'smearing_datafile', + IC86_II.get_aux_data_definition('smearing_datafile')) + + IC86_II_VII.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_II_VII.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + #--------------------------------------------------------------------------- + + dsc.add_datasets(( + IC40, + IC59, + IC79, + IC86_I, + IC86_II, + IC86_III, + IC86_IV, + IC86_V, + IC86_VI, + IC86_VII, + IC86_II_VII + )) + + dsc.set_exp_field_name_renaming_dict({ + 'MJD[days]': 'time', + 'log10(E/GeV)': 'log_energy', + 'AngErr[deg]': 'ang_err', + 'RA[deg]': 'ra', + 'Dec[deg]': 'dec', + 'Azimuth[deg]': 'azi', + 'Zenith[deg]': 'zen' + }) + + def add_run_number(data): + exp = data.exp + exp.append_field('run', np.repeat(0, len(exp))) + + def convert_deg2rad(data): + exp = data.exp + exp['ang_err'] = np.deg2rad(exp['ang_err']) + exp['ra'] = np.deg2rad(exp['ra']) + exp['dec'] = np.deg2rad(exp['dec']) + exp['azi'] = np.deg2rad(exp['azi']) + exp['zen'] = np.deg2rad(exp['zen']) + + dsc.add_data_preparation(add_run_number) + dsc.add_data_preparation(convert_deg2rad) + + return dsc diff --git a/skyllh/datasets/i3/PublicData_10y_ps_wMC.py b/skyllh/datasets/i3/PublicData_10y_ps_wMC.py new file mode 100644 index 0000000000..c7c40d1cde --- /dev/null +++ b/skyllh/datasets/i3/PublicData_10y_ps_wMC.py @@ -0,0 +1,571 @@ +# -*- coding: utf-8 -*- +# Author: Dr. Martin Wolf + +import numpy as np + +from skyllh.core.dataset import DatasetCollection +from skyllh.i3.dataset import I3Dataset + + +def create_dataset_collection(base_path=None, sub_path_fmt=None): + """Defines the dataset collection for IceCube's 10-year + point-source public data, which is available at + http://icecube.wisc.edu/data-releases/20210126_PS-IC40-IC86_VII.zip + + Parameters + ---------- + base_path : str | None + The base path of the data files. The actual path of a data file is + assumed to be of the structure //. + If None, use the default path CFG['repository']['base_path']. + sub_path_fmt : str | None + The sub path format of the data files of the public data sample. + If None, use the default sub path format + 'icecube_10year_ps'. + + Returns + ------- + dsc : DatasetCollection + The dataset collection containing all the seasons as individual + I3Dataset objects. + """ + # Define the version of the data sample (collection). + (version, verqualifiers) = (1, dict(p=0)) + + # Define the default sub path format. + default_sub_path_fmt = 'icecube_10year_ps' + + # We create a dataset collection that will hold the individual seasonal + # public data datasets (all of the same version!). + dsc = DatasetCollection('Public Data 10-year point-source') + + dsc.description = """ + The events contained in this release correspond to the IceCube's + time-integrated point source search with 10 years of data [2]. Please refer + to the description of the sample and known changes in the text at [1]. + + The data contained in this release of IceCube’s point source sample shows + evidence of a cumulative excess of events from four sources (NGC 1068, + TXS 0506+056, PKS 1424+240, and GB6 J1542+6129) from a catalogue of 110 + potential sources. NGC 1068 gives the largest excess and is coincidentally + the hottest spot in the full Northern sky search [1]. + + Data from IC86-2012 through IC86-2014 used in [2] use an updated selection + and reconstruction compared to the 7 year time-integrated search [3] and the + detection of the 2014-2015 neutrino flare from the direction of + TXS 0506+056 [4]. The 7 year and 10 year versions of the sample show + overlaps of between 80 and 90%. + + An a posteriori cross check of the updated sample has been performed on + TXS 0506+056 showing two previously-significant cascade-like events removed + in the newer sample. These two events occur near the blazar's position + during the TXS flare and give large reconstructed energies, but are likely + not well-modeled by the track-like reconstructions included in this + selection. While the events are unlikely to be track-like, their + contribution to previous results has been handled properly. + + While the significance of the 2014-2015 TXS 0505+56 flare has decreased from + p=7.0e-5 to 8.1e-3, the change is a result of changes to the sample and not + of increased data. No problems have been identified with the previously + published results and since we have no reason a priori to prefer the new + sample over the old sample, these results do not supercede those in [4]. + + This release contains data beginning in 2008 (IC40) until the spring of 2018 + (IC86-2017). This release duplicates and supplants previously released data + from 2012 and earlier. Events from this release cannot be combined with any + other releases + + ----------------------------------------- + # Experimental data events + ----------------------------------------- + The "events" folder contains the events observed in the 10 year sample of + IceCube's point source neutrino selection. Each file corresponds to a single + season of IceCube datataking, including roughly one year of data. For each + event, reconstructed particle information is included. + + - MJD: The MJD time (ut1) of the event interaction given to 1e-8 days, + corresponding to roughly millisecond precision. + + - log10(E/GeV): The reconstructed energy of a muon passing through the + detector. The reconstruction follows the prescription for unfolding the + given in Section 8 of [5]. + + - AngErr[deg]: The estimated angular uncertainty on the reconstructed + direction given in degrees. The angular uncertainty is assumed to be + symmetric in azimuth and zenith and is used to calculate the signal spatial + probabilities for each event following the procedure given in [6]. The + errors are calibrated using simulated events so that they provide correct + coverage for an E^{-2} power law flux. This sample assumes a lower limit on + the estimated angular uncertainty of 0.2 degrees. + + - RA[deg], Dec[deg]: The right ascension and declination (J2000) + corresponding to the particle's reconstructed origin. Given in degrees. + + - Azimuth[deg], Zenith[deg]: The local coordinates of the particle's + reconstructed origin. + + The local coordinates may be necessary when searching for transient + phenomena on timescales shorter than 1 day due to non-uniformity in the + detector's response as a function of azimuth. In these cases, we recommend + scrambling events in time, then using the local coordinates and time to + calculate new RA and Dec values. + + Note that during the preparation of this data release, one duplicated event + was discovered in the IC86-2015 season. This event has not contributed to + any significant excesses. + + ----------------------------------------- + # Detector uptime + ----------------------------------------- + In order to properly account for detector uptime, IceCube maintains + "good run lists". These contain information about "good runs", periods of + datataking useful for analysis. Data may be marked unusable for various + reasons, including major construction or upgrade work, calibration runs, or + other anomalies. The "uptime" folder contains lists of the good runs for + each season. + + - MJD_start[days], MJD_stop[days]: The start and end times for each good run + + ----------------------------------------- + # Instrument response functions + ----------------------------------------- + In order to best model the response of the IceCube detector to a given + signal, Monte Carlo simulations are produced for each detector + configuration. Events are sampled from these simulations to model the + response of point sources from an arbitrary source and spectrum. + + We provide several binned responses for the detector in the "irfs" folder + of this data release. + + ------------------ + # Effective Areas + ------------------ + The effective area is a property of the detector and selection which, when + convolved with a flux model, gives the expected rate of events in the + detector. Here we release the muon neutrino effective areas for each season + of data. + + The effective areas are averaged over bins using simulated muon neutrino + events ranging from 100 GeV to 100 PeV. Because the response varies widely + in both energy and declination, we provide the tabulated response in these + two dimensions. Due to IceCube's unique position at the south pole, the + effective area is uniform in right ascension for timescales longer than + 1 day. It varies by about 10% as a function of azimuth, an effect which may + be important for shorter timescales. While the azimuthal effective areas are + not included here, they are included in IceCube's internal analyses. + These may be made available upon request. + + Tabulated versions of the effective area are included in csv files in the + "irfs" folder. Plotted versions are included as pdf files in the same + location. Because the detector configuration and selection were unchanged + after the IC86-2012 season, the effective area for this season should be + used for IC86-2012 through IC86-2017. + + - log10(E_nu/GeV)_min, log10(E_nu/GeV)_max: The minimum and maximum of the + energy bin used to caclulate the average effective area. Note that this uses + the neutrino's true energy and not the reconstructed muon energy. + + - Dec_nu_min[deg], Dec_nu_max[deg]: The minimum and maximum of the + declination of the neutrino origin. Again, note that this is the true + direction of the neutrino and not the reconstructed muon direction. + + - A_Eff[cm^2]: The average effective area across a bin. + + ------------------ + # Smearing Matrices + ------------------ + IceCube has a nontrivial smearing matrix with correlations between the + directional uncertainty, the point spread function, and the reconstructed + muon energy. To provide the most complete set of information, we include + tables of these responses for each season from IC40 through IC86-2012. + Seasons after IC86-2012 reuse that season's response functions. + + The included smearing matrices take the form of 5D tables mapping a + (E_nu, Dec_nu) bin in effective area to a 3D matrix of (E, PSF, AngErr). + The contents of each 3D matrix bin give the fractional count of simulated + events within the bin relative to all events in the (E_nu, Dec_nu) bin. + + Fractional_Counts = [Events in (E_nu, Dec_nu, E, PSF, AngErr)] / + [Events in (E_nu, Dec_nu)] + + The simulations statistics, while large enough for direct sampling, are + limited when producing these tables, ranging from just 621,858 simulated + events for IC40 to 11,595,414 simulated events for IC86-2012. In order to + reduce statistical uncertainties in each 5D bin, bins are selected in each + (E_nu, Dec_nu) bin independently. The bin edges are given in the smearing + matrix files. All locations not given have a Fractional_Counts of 0. + + - log10(E_nu/GeV)_min, log10(E_nu/GeV)_max: The minimum and maximum of the + energy bin used to caclulate the average effective area. Note that this uses + the neutrino's true energy and not the reconstructed muon energy. + + - Dec_nu_min[deg], Dec_nu_max[deg]: The minimum and maximum of the + declination of the neutrino origin. Again, note that this is the true + direction of the neutrino and not the reconstructed muon direction. + + - log10(E/GeV): The reconstructed energy of a muon passing through the + detector. The reconstruction follows the prescription for unfolding the + given in Section 8 of [5]. + + - PSF_min[deg], PSF_max[deg]: The minimum and maximum of the true angle + between the neutrino origin and the reconstructed muon direction. + + - AngErr_min[deg], AngErr_max[deg]: The estimated angular uncertainty on the + reconstructed direction given in degrees. The angular uncertainty is assumed + to be symmetric in azimuth and zenith and is used to calculate the signal + spatial probabilities for each event following the procedure given in [6]. + The errors are calibrated so that they provide correct coverage for an + E^{-2} power law flux. This sample assumes a lower limit on the estimated + angular uncertainty of 0.2 degrees. + + - Fractional_Counts: The fraction of simulated events falling within each + 5D bin relative to all events in the (E_nu, Dec_nu) bin. + + ----------------------------------------- + # References + ----------------------------------------- + [1] IceCube Data for Neutrino Point-Source Searches: Years 2008-2018, + [ArXiv link](https://arxiv.org/abs/2101.09836) + [2] Time-integrated Neutrino Source Searches with 10 years of IceCube Data, + Phys. Rev. Lett. 124, 051103 (2020) + [3] All-sky search for time-integrated neutrino emission from astrophysical + sources with 7 years of IceCube data, + Astrophys. J., 835 (2017) no. 2, 151 + [4] Neutrino emission from the direction of the blazar TXS 0506+056 prior to + the IceCube-170922A alert, + Science 361, 147-151 (2018) + [5] Energy Reconstruction Methods in the IceCube Neutrino Telescope, + JINST 9 (2014), P03009 + [6] Methods for point source analysis in high energy neutrino telescopes, + Astropart.Phys.29:299-305,2008 + + ----------------------------------------- + # Last Update + ----------------------------------------- + 28 January 2021 + """ + + # Define the common keyword arguments for all data sets. + ds_kwargs = dict( + livetime = None, + version = version, + verqualifiers = verqualifiers, + base_path = base_path, + default_sub_path_fmt = default_sub_path_fmt, + sub_path_fmt = sub_path_fmt + ) + + grl_field_name_renaming_dict = { + 'MJD_start[days]': 'start', + 'MJD_stop[days]': 'stop' + } + + # Define the datasets for the different seasons. + # For the declination and energy binning we use the same binning as was + # used in the original point-source analysis using the PointSourceTracks + # dataset. + + # ---------- IC40 ---------------------------------------------------------- + IC40 = I3Dataset( + name = 'IC40', + exp_pathfilenames = 'events/IC40_exp.csv', + mc_pathfilenames = 'sim/IC40_MC.npy', + grl_pathfilenames = 'uptime/IC40_exp.csv', + **ds_kwargs + ) + IC40.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC40.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC40_effectiveArea.csv') + IC40.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC40_smearing.csv') + + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.25, 10 + 1), + np.linspace(-0.25, 0.0, 10 + 1), + np.linspace(0.0, 1., 10 + 1), + ])) + IC40.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(2., 9.5 + 0.01, 0.125) + IC40.define_binning('log_energy', energy_bins) + + # ---------- IC59 ---------------------------------------------------------- + IC59 = I3Dataset( + name = 'IC59', + exp_pathfilenames = 'events/IC59_exp.csv', + mc_pathfilenames = 'sim/IC59_MC.npy', + grl_pathfilenames = 'uptime/IC59_exp.csv', + **ds_kwargs + ) + IC59.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC59.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC59_effectiveArea.csv') + IC59.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC59_smearing.csv') + + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.95, 2 + 1), + np.linspace(-0.95, -0.25, 25 + 1), + np.linspace(-0.25, 0.05, 15 + 1), + np.linspace(0.05, 1., 10 + 1), + ])) + IC59.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(2., 9.5 + 0.01, 0.125) + IC59.define_binning('log_energy', energy_bins) + + # ---------- IC79 ---------------------------------------------------------- + IC79 = I3Dataset( + name = 'IC79', + exp_pathfilenames = 'events/IC79_exp.csv', + mc_pathfilenames = 'sim/IC79_MC.npy', + grl_pathfilenames = 'uptime/IC79_exp.csv', + **ds_kwargs + ) + IC79.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC79.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC79_effectiveArea.csv') + IC79.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC79_smearing.csv') + + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.75, 10 + 1), + np.linspace(-0.75, 0., 15 + 1), + np.linspace(0., 1., 20 + 1) + ])) + IC79.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(2., 9.5 + 0.01, 0.125) + IC79.define_binning('log_energy', energy_bins) + + # ---------- IC86-I -------------------------------------------------------- + IC86_I = I3Dataset( + name = 'IC86_I', + exp_pathfilenames = 'events/IC86_I_exp.csv', + mc_pathfilenames = 'sim/IC86_I_MC.npy', + grl_pathfilenames = 'uptime/IC86_I_exp.csv', + **ds_kwargs + ) + IC86_I.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_I.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_I_effectiveArea.csv') + IC86_I.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_I_smearing.csv') + + b = np.sin(np.radians(-5.)) # North/South transition boundary. + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.2, 10 + 1), + np.linspace(-0.2, b, 4 + 1), + np.linspace(b, 0.2, 5 + 1), + np.linspace(0.2, 1., 10), + ])) + IC86_I.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(1., 10.5 + 0.01, 0.125) + IC86_I.define_binning('log_energy', energy_bins) + + # ---------- IC86-II ------------------------------------------------------- + IC86_II = I3Dataset( + name = 'IC86_II', + exp_pathfilenames = 'events/IC86_II_exp.csv', + mc_pathfilenames = 'sim/IC86_II-VII_MC.npy', + grl_pathfilenames = 'uptime/IC86_II_exp.csv', + **ds_kwargs + ) + IC86_II.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_II.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_II.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.93, 4 + 1), + np.linspace(-0.93, -0.3, 10 + 1), + np.linspace(-0.3, 0.05, 9 + 1), + np.linspace(0.05, 1., 18 + 1), + ])) + IC86_II.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(0.5, 9.5 + 0.01, 0.125) + IC86_II.define_binning('log_energy', energy_bins) + + # ---------- IC86-III ------------------------------------------------------ + IC86_III = I3Dataset( + name = 'IC86_III', + exp_pathfilenames = 'events/IC86_III_exp.csv', + mc_pathfilenames = 'sim/IC86_II-VII_MC.npy', + grl_pathfilenames = 'uptime/IC86_III_exp.csv', + **ds_kwargs + ) + IC86_III.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_III.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_III.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + + IC86_III.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_III.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-IV ------------------------------------------------------- + IC86_IV = I3Dataset( + name = 'IC86_IV', + exp_pathfilenames = 'events/IC86_IV_exp.csv', + mc_pathfilenames = 'sim/IC86_II-VII_MC.npy', + grl_pathfilenames = 'uptime/IC86_IV_exp.csv', + **ds_kwargs + ) + IC86_IV.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_IV.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_IV.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + + IC86_IV.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_IV.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-V -------------------------------------------------------- + IC86_V = I3Dataset( + name = 'IC86_V', + exp_pathfilenames = 'events/IC86_V_exp.csv', + mc_pathfilenames = 'sim/IC86_II-VII_MC.npy', + grl_pathfilenames = 'uptime/IC86_V_exp.csv', + **ds_kwargs + ) + IC86_V.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_V.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_V.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + + IC86_V.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_V.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-VI ------------------------------------------------------- + IC86_VI = I3Dataset( + name = 'IC86_VI', + exp_pathfilenames = 'events/IC86_VI_exp.csv', + mc_pathfilenames = 'sim/IC86_II-VII_MC.npy', + grl_pathfilenames = 'uptime/IC86_VI_exp.csv', + **ds_kwargs + ) + IC86_VI.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_VI.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_VI.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + + IC86_VI.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_VI.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-VII ------------------------------------------------------ + IC86_VII = I3Dataset( + name = 'IC86_VII', + exp_pathfilenames = 'events/IC86_VII_exp.csv', + mc_pathfilenames = 'sim/IC86_II-VII_MC.npy', + grl_pathfilenames = 'uptime/IC86_VII_exp.csv', + **ds_kwargs + ) + IC86_VII.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_VII.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_VII.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + + IC86_VII.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_VII.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-II-VII --------------------------------------------------- + ds_list = [ + IC86_II, + IC86_III, + IC86_IV, + IC86_V, + IC86_VI, + IC86_VII, + ] + IC86_II_VII = I3Dataset( + name = 'IC86_II-VII', + exp_pathfilenames = I3Dataset.get_combined_exp_pathfilenames(ds_list), + mc_pathfilenames = IC86_II.mc_pathfilename_list, + grl_pathfilenames = I3Dataset.get_combined_grl_pathfilenames(ds_list), + **ds_kwargs + ) + IC86_II_VII.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_II_VII.add_aux_data_definition( + 'eff_area_datafile', + IC86_II.get_aux_data_definition('eff_area_datafile')) + + IC86_II_VII.add_aux_data_definition( + 'smearing_datafile', + IC86_II.get_aux_data_definition('smearing_datafile')) + + IC86_II_VII.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_II_VII.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + #--------------------------------------------------------------------------- + + dsc.add_datasets(( + IC40, + IC59, + IC79, + IC86_I, + IC86_II, + IC86_III, + IC86_IV, + IC86_V, + IC86_VI, + IC86_VII, + IC86_II_VII + )) + + dsc.set_exp_field_name_renaming_dict({ + 'MJD[days]': 'time', + 'log10(E/GeV)': 'log_energy', + 'AngErr[deg]': 'ang_err', + 'RA[deg]': 'ra', + 'Dec[deg]': 'dec', + 'Azimuth[deg]': 'azi', + 'Zenith[deg]': 'zen' + }) + + def add_run_number(data): + exp = data.exp + mc = data.mc + exp.append_field('run', np.repeat(0, len(exp))) + mc.append_field('run', np.repeat(0, len(mc))) + + def add_time(data): + mc = data.mc + mc.append_field('time', np.repeat(0, len(mc))) + + def add_azimuth_and_zenith(data): + mc = data.mc + mc.append_field('azi', np.repeat(0, len(mc))) + mc.append_field('zen', np.repeat(0, len(mc))) + + def convert_deg2rad(data): + exp = data.exp + exp['ang_err'] = np.deg2rad(exp['ang_err']) + exp['ra'] = np.deg2rad(exp['ra']) + exp['dec'] = np.deg2rad(exp['dec']) + exp['azi'] = np.deg2rad(exp['azi']) + exp['zen'] = np.deg2rad(exp['zen']) + + dsc.add_data_preparation(add_run_number) + dsc.add_data_preparation(add_time) + dsc.add_data_preparation(add_azimuth_and_zenith) + dsc.add_data_preparation(convert_deg2rad) + + return dsc diff --git a/skyllh/datasets/i3/PublicData_10y_ps_wMCEq.py b/skyllh/datasets/i3/PublicData_10y_ps_wMCEq.py new file mode 100644 index 0000000000..38e05ce111 --- /dev/null +++ b/skyllh/datasets/i3/PublicData_10y_ps_wMCEq.py @@ -0,0 +1,598 @@ +# -*- coding: utf-8 -*- +# Author: Dr. Martin Wolf + +import numpy as np + +from skyllh.core.dataset import DatasetCollection +from skyllh.i3.dataset import I3Dataset + + +def create_dataset_collection(base_path=None, sub_path_fmt=None): + """Defines the dataset collection for IceCube's 10-year + point-source public data, which is available at + http://icecube.wisc.edu/data-releases/20210126_PS-IC40-IC86_VII.zip + + Parameters + ---------- + base_path : str | None + The base path of the data files. The actual path of a data file is + assumed to be of the structure //. + If None, use the default path CFG['repository']['base_path']. + sub_path_fmt : str | None + The sub path format of the data files of the public data sample. + If None, use the default sub path format + 'icecube_10year_ps'. + + Returns + ------- + dsc : DatasetCollection + The dataset collection containing all the seasons as individual + I3Dataset objects. + """ + # Define the version of the data sample (collection). + (version, verqualifiers) = (1, dict(p=0)) + + # Define the default sub path format. + default_sub_path_fmt = 'icecube_10year_ps' + + # We create a dataset collection that will hold the individual seasonal + # public data datasets (all of the same version!). + dsc = DatasetCollection('Public Data 10-year point-source') + + dsc.description = """ + The events contained in this release correspond to the IceCube's + time-integrated point source search with 10 years of data [2]. Please refer + to the description of the sample and known changes in the text at [1]. + + The data contained in this release of IceCube’s point source sample shows + evidence of a cumulative excess of events from four sources (NGC 1068, + TXS 0506+056, PKS 1424+240, and GB6 J1542+6129) from a catalogue of 110 + potential sources. NGC 1068 gives the largest excess and is coincidentally + the hottest spot in the full Northern sky search [1]. + + Data from IC86-2012 through IC86-2014 used in [2] use an updated selection + and reconstruction compared to the 7 year time-integrated search [3] and the + detection of the 2014-2015 neutrino flare from the direction of + TXS 0506+056 [4]. The 7 year and 10 year versions of the sample show + overlaps of between 80 and 90%. + + An a posteriori cross check of the updated sample has been performed on + TXS 0506+056 showing two previously-significant cascade-like events removed + in the newer sample. These two events occur near the blazar's position + during the TXS flare and give large reconstructed energies, but are likely + not well-modeled by the track-like reconstructions included in this + selection. While the events are unlikely to be track-like, their + contribution to previous results has been handled properly. + + While the significance of the 2014-2015 TXS 0505+56 flare has decreased from + p=7.0e-5 to 8.1e-3, the change is a result of changes to the sample and not + of increased data. No problems have been identified with the previously + published results and since we have no reason a priori to prefer the new + sample over the old sample, these results do not supercede those in [4]. + + This release contains data beginning in 2008 (IC40) until the spring of 2018 + (IC86-2017). This release duplicates and supplants previously released data + from 2012 and earlier. Events from this release cannot be combined with any + other releases + + ----------------------------------------- + # Experimental data events + ----------------------------------------- + The "events" folder contains the events observed in the 10 year sample of + IceCube's point source neutrino selection. Each file corresponds to a single + season of IceCube datataking, including roughly one year of data. For each + event, reconstructed particle information is included. + + - MJD: The MJD time (ut1) of the event interaction given to 1e-8 days, + corresponding to roughly millisecond precision. + + - log10(E/GeV): The reconstructed energy of a muon passing through the + detector. The reconstruction follows the prescription for unfolding the + given in Section 8 of [5]. + + - AngErr[deg]: The estimated angular uncertainty on the reconstructed + direction given in degrees. The angular uncertainty is assumed to be + symmetric in azimuth and zenith and is used to calculate the signal spatial + probabilities for each event following the procedure given in [6]. The + errors are calibrated using simulated events so that they provide correct + coverage for an E^{-2} power law flux. This sample assumes a lower limit on + the estimated angular uncertainty of 0.2 degrees. + + - RA[deg], Dec[deg]: The right ascension and declination (J2000) + corresponding to the particle's reconstructed origin. Given in degrees. + + - Azimuth[deg], Zenith[deg]: The local coordinates of the particle's + reconstructed origin. + + The local coordinates may be necessary when searching for transient + phenomena on timescales shorter than 1 day due to non-uniformity in the + detector's response as a function of azimuth. In these cases, we recommend + scrambling events in time, then using the local coordinates and time to + calculate new RA and Dec values. + + Note that during the preparation of this data release, one duplicated event + was discovered in the IC86-2015 season. This event has not contributed to + any significant excesses. + + ----------------------------------------- + # Detector uptime + ----------------------------------------- + In order to properly account for detector uptime, IceCube maintains + "good run lists". These contain information about "good runs", periods of + datataking useful for analysis. Data may be marked unusable for various + reasons, including major construction or upgrade work, calibration runs, or + other anomalies. The "uptime" folder contains lists of the good runs for + each season. + + - MJD_start[days], MJD_stop[days]: The start and end times for each good run + + ----------------------------------------- + # Instrument response functions + ----------------------------------------- + In order to best model the response of the IceCube detector to a given + signal, Monte Carlo simulations are produced for each detector + configuration. Events are sampled from these simulations to model the + response of point sources from an arbitrary source and spectrum. + + We provide several binned responses for the detector in the "irfs" folder + of this data release. + + ------------------ + # Effective Areas + ------------------ + The effective area is a property of the detector and selection which, when + convolved with a flux model, gives the expected rate of events in the + detector. Here we release the muon neutrino effective areas for each season + of data. + + The effective areas are averaged over bins using simulated muon neutrino + events ranging from 100 GeV to 100 PeV. Because the response varies widely + in both energy and declination, we provide the tabulated response in these + two dimensions. Due to IceCube's unique position at the south pole, the + effective area is uniform in right ascension for timescales longer than + 1 day. It varies by about 10% as a function of azimuth, an effect which may + be important for shorter timescales. While the azimuthal effective areas are + not included here, they are included in IceCube's internal analyses. + These may be made available upon request. + + Tabulated versions of the effective area are included in csv files in the + "irfs" folder. Plotted versions are included as pdf files in the same + location. Because the detector configuration and selection were unchanged + after the IC86-2012 season, the effective area for this season should be + used for IC86-2012 through IC86-2017. + + - log10(E_nu/GeV)_min, log10(E_nu/GeV)_max: The minimum and maximum of the + energy bin used to caclulate the average effective area. Note that this uses + the neutrino's true energy and not the reconstructed muon energy. + + - Dec_nu_min[deg], Dec_nu_max[deg]: The minimum and maximum of the + declination of the neutrino origin. Again, note that this is the true + direction of the neutrino and not the reconstructed muon direction. + + - A_Eff[cm^2]: The average effective area across a bin. + + ------------------ + # Smearing Matrices + ------------------ + IceCube has a nontrivial smearing matrix with correlations between the + directional uncertainty, the point spread function, and the reconstructed + muon energy. To provide the most complete set of information, we include + tables of these responses for each season from IC40 through IC86-2012. + Seasons after IC86-2012 reuse that season's response functions. + + The included smearing matrices take the form of 5D tables mapping a + (E_nu, Dec_nu) bin in effective area to a 3D matrix of (E, PSF, AngErr). + The contents of each 3D matrix bin give the fractional count of simulated + events within the bin relative to all events in the (E_nu, Dec_nu) bin. + + Fractional_Counts = [Events in (E_nu, Dec_nu, E, PSF, AngErr)] / + [Events in (E_nu, Dec_nu)] + + The simulations statistics, while large enough for direct sampling, are + limited when producing these tables, ranging from just 621,858 simulated + events for IC40 to 11,595,414 simulated events for IC86-2012. In order to + reduce statistical uncertainties in each 5D bin, bins are selected in each + (E_nu, Dec_nu) bin independently. The bin edges are given in the smearing + matrix files. All locations not given have a Fractional_Counts of 0. + + - log10(E_nu/GeV)_min, log10(E_nu/GeV)_max: The minimum and maximum of the + energy bin used to caclulate the average effective area. Note that this uses + the neutrino's true energy and not the reconstructed muon energy. + + - Dec_nu_min[deg], Dec_nu_max[deg]: The minimum and maximum of the + declination of the neutrino origin. Again, note that this is the true + direction of the neutrino and not the reconstructed muon direction. + + - log10(E/GeV): The reconstructed energy of a muon passing through the + detector. The reconstruction follows the prescription for unfolding the + given in Section 8 of [5]. + + - PSF_min[deg], PSF_max[deg]: The minimum and maximum of the true angle + between the neutrino origin and the reconstructed muon direction. + + - AngErr_min[deg], AngErr_max[deg]: The estimated angular uncertainty on the + reconstructed direction given in degrees. The angular uncertainty is assumed + to be symmetric in azimuth and zenith and is used to calculate the signal + spatial probabilities for each event following the procedure given in [6]. + The errors are calibrated so that they provide correct coverage for an + E^{-2} power law flux. This sample assumes a lower limit on the estimated + angular uncertainty of 0.2 degrees. + + - Fractional_Counts: The fraction of simulated events falling within each + 5D bin relative to all events in the (E_nu, Dec_nu) bin. + + ----------------------------------------- + # References + ----------------------------------------- + [1] IceCube Data for Neutrino Point-Source Searches: Years 2008-2018, + [ArXiv link](https://arxiv.org/abs/2101.09836) + [2] Time-integrated Neutrino Source Searches with 10 years of IceCube Data, + Phys. Rev. Lett. 124, 051103 (2020) + [3] All-sky search for time-integrated neutrino emission from astrophysical + sources with 7 years of IceCube data, + Astrophys. J., 835 (2017) no. 2, 151 + [4] Neutrino emission from the direction of the blazar TXS 0506+056 prior to + the IceCube-170922A alert, + Science 361, 147-151 (2018) + [5] Energy Reconstruction Methods in the IceCube Neutrino Telescope, + JINST 9 (2014), P03009 + [6] Methods for point source analysis in high energy neutrino telescopes, + Astropart.Phys.29:299-305,2008 + + ----------------------------------------- + # Last Update + ----------------------------------------- + 28 January 2021 + """ + + # Define the common keyword arguments for all data sets. + ds_kwargs = dict( + livetime = None, + version = version, + verqualifiers = verqualifiers, + base_path = base_path, + default_sub_path_fmt = default_sub_path_fmt, + sub_path_fmt = sub_path_fmt + ) + + grl_field_name_renaming_dict = { + 'MJD_start[days]': 'start', + 'MJD_stop[days]': 'stop' + } + + # Define the datasets for the different seasons. + # For the declination and energy binning we use the same binning as was + # used in the original point-source analysis using the PointSourceTracks + # dataset. + + # ---------- IC40 ---------------------------------------------------------- + IC40 = I3Dataset( + name = 'IC40', + exp_pathfilenames = 'events/IC40_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC40_exp.csv', + **ds_kwargs + ) + IC40.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC40.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC40_effectiveArea.csv') + IC40.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC40_smearing.csv') + IC40.add_aux_data_definition( + 'mceq_flux_datafile', 'fluxes/mceq_IC40.pkl') + IC40.add_aux_data_definition( + 'pdf_bkg_datafile', 'pdfs/pdf_bkg_log10emu_sindecmu_IC40.pkl') + + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.25, 10 + 1), + np.linspace(-0.25, 0.0, 10 + 1), + np.linspace(0.0, 1., 10 + 1), + ])) + IC40.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(2., 9.5 + 0.01, 0.125) + IC40.define_binning('log_energy', energy_bins) + + # ---------- IC59 ---------------------------------------------------------- + IC59 = I3Dataset( + name = 'IC59', + exp_pathfilenames = 'events/IC59_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC59_exp.csv', + **ds_kwargs + ) + IC59.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC59.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC59_effectiveArea.csv') + IC59.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC59_smearing.csv') + IC59.add_aux_data_definition( + 'mceq_flux_datafile', 'fluxes/mceq_IC59.pkl') + IC59.add_aux_data_definition( + 'pdf_bkg_datafile', 'pdfs/pdf_bkg_log10emu_sindecmu_IC59.pkl') + + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.95, 2 + 1), + np.linspace(-0.95, -0.25, 25 + 1), + np.linspace(-0.25, 0.05, 15 + 1), + np.linspace(0.05, 1., 10 + 1), + ])) + IC59.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(2., 9.5 + 0.01, 0.125) + IC59.define_binning('log_energy', energy_bins) + + # ---------- IC79 ---------------------------------------------------------- + IC79 = I3Dataset( + name = 'IC79', + exp_pathfilenames = 'events/IC79_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC79_exp.csv', + **ds_kwargs + ) + IC79.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC79.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC79_effectiveArea.csv') + IC79.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC79_smearing.csv') + IC79.add_aux_data_definition( + 'mceq_flux_datafile', 'fluxes/mceq_IC79.pkl') + IC79.add_aux_data_definition( + 'pdf_bkg_datafile', 'pdfs/pdf_bkg_log10emu_sindecmu_IC79.pkl') + + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.75, 10 + 1), + np.linspace(-0.75, 0., 15 + 1), + np.linspace(0., 1., 20 + 1) + ])) + IC79.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(2., 9.5 + 0.01, 0.125) + IC79.define_binning('log_energy', energy_bins) + + # ---------- IC86-I -------------------------------------------------------- + IC86_I = I3Dataset( + name = 'IC86_I', + exp_pathfilenames = 'events/IC86_I_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC86_I_exp.csv', + **ds_kwargs + ) + IC86_I.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_I.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_I_effectiveArea.csv') + IC86_I.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_I_smearing.csv') + IC86_I.add_aux_data_definition( + 'mceq_flux_datafile', 'fluxes/mceq_IC86_I.pkl') + IC86_I.add_aux_data_definition( + 'pdf_bkg_datafile', 'pdfs/pdf_bkg_log10emu_sindecmu_IC86_I.pkl') + + b = np.sin(np.radians(-5.)) # North/South transition boundary. + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.2, 10 + 1), + np.linspace(-0.2, b, 4 + 1), + np.linspace(b, 0.2, 5 + 1), + np.linspace(0.2, 1., 10), + ])) + IC86_I.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(1., 10.5 + 0.01, 0.125) + IC86_I.define_binning('log_energy', energy_bins) + + # ---------- IC86-II ------------------------------------------------------- + IC86_II = I3Dataset( + name = 'IC86_II', + exp_pathfilenames = 'events/IC86_II_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC86_II_exp.csv', + **ds_kwargs + ) + IC86_II.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_II.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_II.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + IC86_II.add_aux_data_definition( + 'mceq_flux_datafile', 'fluxes/mceq_IC86_II.pkl') + IC86_II.add_aux_data_definition( + 'pdf_bkg_datafile', 'pdfs/pdf_bkg_log10emu_sindecmu_IC86_II.pkl') + + sin_dec_bins = np.unique(np.concatenate([ + np.linspace(-1., -0.93, 4 + 1), + np.linspace(-0.93, -0.3, 10 + 1), + np.linspace(-0.3, 0.05, 9 + 1), + np.linspace(0.05, 1., 18 + 1), + ])) + IC86_II.define_binning('sin_dec', sin_dec_bins) + + energy_bins = np.arange(0.5, 9.5 + 0.01, 0.125) + IC86_II.define_binning('log_energy', energy_bins) + + # ---------- IC86-III ------------------------------------------------------ + IC86_III = I3Dataset( + name = 'IC86_III', + exp_pathfilenames = 'events/IC86_III_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC86_III_exp.csv', + **ds_kwargs + ) + IC86_III.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_III.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_III.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + IC86_III.add_aux_data_definition( + 'mceq_flux_datafile', 'fluxes/mceq_IC86_II.pkl') + IC86_III.add_aux_data_definition( + 'pdf_bkg_datafile', 'pdfs/pdf_bkg_log10emu_sindecmu_IC86_III.pkl') + + IC86_III.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_III.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-IV ------------------------------------------------------- + IC86_IV = I3Dataset( + name = 'IC86_IV', + exp_pathfilenames = 'events/IC86_IV_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC86_IV_exp.csv', + **ds_kwargs + ) + IC86_IV.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_IV.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_IV.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + IC86_IV.add_aux_data_definition( + 'mceq_flux_datafile', 'fluxes/mceq_IC86_II.pkl') + IC86_IV.add_aux_data_definition( + 'pdf_bkg_datafile', 'pdfs/pdf_bkg_log10emu_sindecmu_IC86_IV.pkl') + + IC86_IV.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_IV.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-V -------------------------------------------------------- + IC86_V = I3Dataset( + name = 'IC86_V', + exp_pathfilenames = 'events/IC86_V_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC86_V_exp.csv', + **ds_kwargs + ) + IC86_V.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_V.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_V.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + IC86_V.add_aux_data_definition( + 'mceq_flux_datafile', 'fluxes/mceq_IC86_II.pkl') + IC86_V.add_aux_data_definition( + 'pdf_bkg_datafile', 'pdfs/pdf_bkg_log10emu_sindecmu_IC86_V.pkl') + + IC86_V.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_V.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-VI ------------------------------------------------------- + IC86_VI = I3Dataset( + name = 'IC86_VI', + exp_pathfilenames = 'events/IC86_VI_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC86_VI_exp.csv', + **ds_kwargs + ) + IC86_VI.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_VI.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_VI.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + IC86_VI.add_aux_data_definition( + 'mceq_flux_datafile', 'fluxes/mceq_IC86_II.pkl') + IC86_VI.add_aux_data_definition( + 'pdf_bkg_datafile', 'pdfs/pdf_bkg_log10emu_sindecmu_IC86_VI.pkl') + + IC86_VI.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_VI.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-VII ------------------------------------------------------ + IC86_VII = I3Dataset( + name = 'IC86_VII', + exp_pathfilenames = 'events/IC86_VII_exp.csv', + mc_pathfilenames = None, + grl_pathfilenames = 'uptime/IC86_VII_exp.csv', + **ds_kwargs + ) + IC86_VII.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_VII.add_aux_data_definition( + 'eff_area_datafile', 'irfs/IC86_II_effectiveArea.csv') + IC86_VII.add_aux_data_definition( + 'smearing_datafile', 'irfs/IC86_II_smearing.csv') + IC86_VII.add_aux_data_definition( + 'mceq_flux_datafile', 'fluxes/mceq_IC86_II.pkl') + IC86_VII.add_aux_data_definition( + 'pdf_bkg_datafile', 'pdfs/pdf_bkg_log10emu_sindecmu_IC86_VII.pkl') + + IC86_VII.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_VII.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + # ---------- IC86-II-VII --------------------------------------------------- + ds_list = [ + IC86_II, + IC86_III, + IC86_IV, + IC86_V, + IC86_VI, + IC86_VII, + ] + IC86_II_VII = I3Dataset( + name = 'IC86_II-VII', + exp_pathfilenames = I3Dataset.get_combined_exp_pathfilenames(ds_list), + mc_pathfilenames = None, + grl_pathfilenames = I3Dataset.get_combined_grl_pathfilenames(ds_list), + **ds_kwargs + ) + IC86_II_VII.grl_field_name_renaming_dict = grl_field_name_renaming_dict + IC86_II_VII.add_aux_data_definition( + 'eff_area_datafile', + IC86_II.get_aux_data_definition('eff_area_datafile')) + + IC86_II_VII.add_aux_data_definition( + 'smearing_datafile', + IC86_II.get_aux_data_definition('smearing_datafile')) + + IC86_II_VII.add_binning_definition( + IC86_II.get_binning_definition('sin_dec')) + IC86_II_VII.add_binning_definition( + IC86_II.get_binning_definition('log_energy')) + + #--------------------------------------------------------------------------- + + dsc.add_datasets(( + IC40, + IC59, + IC79, + IC86_I, + IC86_II, + IC86_III, + IC86_IV, + IC86_V, + IC86_VI, + IC86_VII, + IC86_II_VII + )) + + dsc.set_exp_field_name_renaming_dict({ + 'MJD[days]': 'time', + 'log10(E/GeV)': 'log_energy', + 'AngErr[deg]': 'ang_err', + 'RA[deg]': 'ra', + 'Dec[deg]': 'dec', + 'Azimuth[deg]': 'azi', + 'Zenith[deg]': 'zen' + }) + + def add_run_number(data): + exp = data.exp + exp.append_field('run', np.repeat(0, len(exp))) + + def convert_deg2rad(data): + exp = data.exp + exp['ang_err'] = np.deg2rad(exp['ang_err']) + exp['ra'] = np.deg2rad(exp['ra']) + exp['dec'] = np.deg2rad(exp['dec']) + exp['azi'] = np.deg2rad(exp['azi']) + exp['zen'] = np.deg2rad(exp['zen']) + + dsc.add_data_preparation(add_run_number) + dsc.add_data_preparation(convert_deg2rad) + + return dsc diff --git a/skyllh/datasets/i3/__init__.py b/skyllh/datasets/i3/__init__.py new file mode 100644 index 0000000000..3b3cb2ea35 --- /dev/null +++ b/skyllh/datasets/i3/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- + +from skyllh.datasets.i3 import ( + PublicData_10y_ps, + PublicData_10y_ps_wMC, + PublicData_10y_ps_wMCEq, +) + +data_samples = { + 'PublicData_10y_ps': PublicData_10y_ps, + 'PublicData_10y_ps_wMC': PublicData_10y_ps_wMC, + 'PublicData_10y_ps_wMCEq': PublicData_10y_ps_wMCEq +} diff --git a/skyllh/i3/dataset.py b/skyllh/i3/dataset.py index cf56df291e..a2f625d7ba 100644 --- a/skyllh/i3/dataset.py +++ b/skyllh/i3/dataset.py @@ -4,11 +4,15 @@ import os.path from skyllh.core import display -from skyllh.core.py import issequenceof +from skyllh.core.py import ( + issequenceof, + module_classname +) from skyllh.core.dataset import ( Dataset, DatasetData ) +from skyllh.core.debugging import get_logger from skyllh.core.storage import ( DataFieldRecordArray, create_FileLoader @@ -19,6 +23,7 @@ # This will change the skyllh.core.config.CFG dictionary. from skyllh.i3 import config + class I3Dataset(Dataset): """The I3Dataset class is an IceCube specific Dataset class that adds IceCube specific properties to the Dataset class. These additional @@ -62,6 +67,8 @@ def __init__(self, grl_pathfilenames=None, *args, **kwargs): """ super(I3Dataset, self).__init__(*args, **kwargs) + self._logger = get_logger(module_classname(self)) + self.grl_pathfilename_list = grl_pathfilenames self.grl_field_name_renaming_dict = dict() @@ -146,8 +153,8 @@ def __str__(self): return s def load_grl(self, efficiency_mode=None, tl=None): - """Loads the good-run-list and returns a structured numpy ndarray with - the following data fields: + """Loads the good-run-list and returns a DataFieldRecordArray instance + which should contain the following data fields: run : int The run number. @@ -248,41 +255,22 @@ def load_data( # Load the good-run-list (GRL) data if it is provided for this dataset, # and calculate the livetime based on the GRL. data_grl = None - lt = self.livetime if(len(self._grl_pathfilename_list) > 0): data_grl = self.load_grl( efficiency_mode=efficiency_mode, tl=tl) - if('livetime' not in data_grl.field_name_list): - raise KeyError('The GRL file(s) "%s" has no data field named ' - '"livetime"!'%(','.join(self._grl_pathfilename_list))) - lt = np.sum(data_grl['livetime']) - - # Override the livetime if there is a user defined livetime. - if(livetime is not None): - lt = livetime # Load all the defined data. data = I3DatasetData( super(I3Dataset, self).load_data( keep_fields=keep_fields, - livetime=lt, + livetime=livetime, dtc_dict=dtc_dict, dtc_except_fields=dtc_except_fields, efficiency_mode=efficiency_mode, tl=tl), data_grl) - # Select only the experimental data which fits the good-run-list for - # this dataset. - if(data_grl is not None): - task = 'Selected only the experimental data that matches the GRL '\ - 'for dataset "%s".'%(self.name) - with TaskTimer(tl, task): - runs = np.unique(data_grl['run']) - mask = np.isin(data.exp['run'], runs) - data.exp = data.exp[mask] - return data def prepare_data(self, data, tl=None): @@ -309,15 +297,71 @@ def prepare_data(self, data, tl=None): super(I3Dataset, self).prepare_data(data, tl=tl) if(data.exp is not None): + # Append sin(dec) data field to the experimental data. task = 'Appending IceCube-specific data fields to exp data.' with TaskTimer(tl, task): - data.exp.append_field('sin_dec', np.sin(data.exp['dec'])) + if 'sin_dec' not in data.exp.field_name_list: + data.exp.append_field( + 'sin_dec', np.sin(data.exp['dec'])) - # Append sin(dec) and sin(true_dec) to the MC data. - task = 'Appending IceCube-specific data fields to MC data.' - with TaskTimer(tl, task): - data.mc.append_field('sin_dec', np.sin(data.mc['dec'])) - data.mc.append_field('sin_true_dec', np.sin(data.mc['true_dec'])) + if(data.mc is not None): + # Append sin(dec) and sin(true_dec) to the MC data. + task = 'Appending IceCube-specific data fields to MC data.' + with TaskTimer(tl, task): + if 'sin_dec' not in data.mc.field_name_list: + data.mc.append_field( + 'sin_dec', np.sin(data.mc['dec'])) + if 'sin_true_dec' not in data.mc.field_name_list: + data.mc.append_field( + 'sin_true_dec', np.sin(data.mc['true_dec'])) + + # Set the livetime of the dataset from the GRL data when no livetime + # was specified previously. + if(data.livetime is None and data.grl is not None): + if('start' not in data.grl): + raise KeyError('The GRL data for dataset "{}" has no data ' + 'field named "start"!'.format(self.name)) + if('stop' not in data.grl): + raise KeyError('The GRL data for dataset "{}" has no data ' + 'field named "stop"!'.format(self.name)) + data.livetime = np.sum(data.grl['stop'] - data.grl['start']) + + # Select only the experimental data which fits the good-run-list for + # this dataset. + if data.grl is not None: + # Select based on run information. + if (('run' in data.grl) and + ('run' in data.exp)): + task = 'Selected only the experimental data that matches the '\ + 'run information in the GRL for dataset "%s".'%(self.name) + with TaskTimer(tl, task): + runs = np.unique(data.grl['run']) + mask = np.isin(data.exp['run'], runs) + data.exp = data.exp[mask] + + # Select based on detector on-time information. + if (('start' in data.grl) and + ('stop' in data.grl) and + ('time' in data.exp)): + task = 'Selected only the experimental data that matches the '\ + 'detector\'s on-time information in the GRL for dataset '\ + '"%s".'%(self.name) + with TaskTimer(tl, task): + mask = np.zeros((len(data.exp),), dtype=np.bool_) + for (start, stop) in zip(data.grl['start'], + data.grl['stop']): + mask |= ( + (data.exp['time'] >= start) & + (data.exp['time'] < stop) + ) + + if np.any(~mask): + n_cut_evts = np.count_nonzero(~mask) + self._logger.info( + f'Cutting {n_cut_evts} events from dataset ' + f'{self.name} due to GRL on-time window ' + 'information.') + data.exp = data.exp[mask] class I3DatasetData(DatasetData): @@ -326,6 +370,17 @@ class I3DatasetData(DatasetData): holds the good-run-list (GRL) data. """ def __init__(self, data, data_grl): + """Constructs a new I3DatasetData instance. + + Parameters + ---------- + data : DatasetData instance + The DatasetData instance holding the experimental and monte-carlo + data. + data_grl : DataFieldRecordArray instance | None + The DataFieldRecordArray instance holding the good-run-list data + of the dataset. This can be None, if no GRL data is available. + """ super(I3DatasetData, self).__init__( data._exp, data._mc, data._livetime) diff --git a/skyllh/i3/detsigyield.py b/skyllh/i3/detsigyield.py index e344153c80..2f1503057c 100644 --- a/skyllh/i3/detsigyield.py +++ b/skyllh/i3/detsigyield.py @@ -432,7 +432,12 @@ def __call__(self, src, src_flux_params): parameter, i.e. gamma, the array is (N_sources,1)-shaped. """ src_dec = np.atleast_1d(src['dec']) - src_gamma = src_flux_params['gamma'] + if src_flux_params is None: + # Gamma is not a fit parameter. So we take it from the + # initial flux model. + src_gamma = np.array([self.fluxmodel.gamma], dtype=np.double) + else: + src_gamma = src_flux_params['gamma'] # Create results array. values = np.zeros_like(src_dec, dtype=np.float64) diff --git a/skyllh/i3/pdf.py b/skyllh/i3/pdf.py index 46e8ba492e..3fe6804a4f 100644 --- a/skyllh/i3/pdf.py +++ b/skyllh/i3/pdf.py @@ -229,9 +229,12 @@ def get_prob(self, tdm, fitparams=None, tl=None): logE_binning = self.get_binning('log_energy') sinDec_binning = self.get_binning('sin_dec') - logE_idx = np.digitize(get_data('log_energy'), logE_binning.binedges) - 1 - sinDec_idx = np.digitize(get_data('sin_dec'), sinDec_binning.binedges) - 1 + logE_idx = np.digitize( + get_data('log_energy'), logE_binning.binedges) - 1 + sinDec_idx = np.digitize( + get_data('sin_dec'), sinDec_binning.binedges) - 1 with TaskTimer(tl, 'Evaluating logE-sinDec histogram.'): prob = self._hist_logE_sinDec[(logE_idx,sinDec_idx)] + return prob diff --git a/skyllh/i3/scrambling.py b/skyllh/i3/scrambling.py index 07a6d794ec..bd0616ee6d 100644 --- a/skyllh/i3/scrambling.py +++ b/skyllh/i3/scrambling.py @@ -1,7 +1,15 @@ # -*- coding: utf-8 -*- -from skyllh.core.scrambling import TimeScramblingMethod -from skyllh.i3.coords import hor_to_equ_transform, azi_to_ra_transform +import numpy as np + +from skyllh.core.scrambling import ( + DataScramblingMethod, + TimeScramblingMethod, +) +from skyllh.i3.coords import ( + azi_to_ra_transform, + hor_to_equ_transform, +) class I3TimeScramblingMethod(TimeScramblingMethod): @@ -46,3 +54,70 @@ def scramble(self, rss, data): data['ra'] = azi_to_ra_transform(data['azi'], mjds) return data + + +class I3SeasonalVariationTimeScramblingMethod(DataScramblingMethod): + """The I3SeasonalVariationTimeScramblingMethod class provides a data + scrambling method to perform data coordinate scrambling based on a generated + time, which follows seasonal variations within the experimental data. + """ + def __init__(self, data, **kwargs): + """Initializes a new seasonal time scrambling instance. + + Parameters + ---------- + data : instance of I3DatasetData + The instance of I3DatasetData holding the experimental data and + good-run-list information. + """ + super().__init__(**kwargs) + + # The run weights are the number of events in each run relative to all + # the events to account for possible seasonal variations. + self.run_weights = np.zeros((len(data.grl),), dtype=np.float64) + n_events = len(data.exp['time']) + for (i, (start, stop)) in enumerate( + zip(data.grl['start'], data.grl['stop'])): + mask = (data.exp['time'] >= start) & (data.exp['time'] < stop) + self.run_weights[i] = len(data.exp[mask]) / n_events + self.run_weights /= np.sum(self.run_weights) + + self.grl = data.grl + + def scramble(self, rss, data): + """Scrambles the given data based on random MJD times, which are + generated uniformely within the data runs, where the data runs are + weighted based on their amount of events compared to the total events. + + Parameters + ---------- + rss : instance of RandomStateService + The random state service providing the random number + generator (RNG). + data : instance of DataFieldRecordArray + The DataFieldRecordArray instance containing the to be scrambled + data. + + Returns + ------- + data : instance of DataFieldRecordArray + The given DataFieldRecordArray holding the scrambled data. + """ + # Get run indices based on their seasonal weights. + run_idxs = rss.random.choice( + self.grl['start'].size, + size=len(data['time']), + p=self.weights) + + # Draw random times uniformely within the runs. + times = rss.random.uniform( + self.grl['start'][run_idxs], + self.grl['stop'][run_idxs]) + + # Get the correct right ascension. + data['time'] = times + data['ra'] = azi_to_ra_transform( + azi=data['azi'], + mjd=times) + + return data diff --git a/skyllh/physics/flux.py b/skyllh/physics/flux.py index 724f761670..b8b8424f95 100644 --- a/skyllh/physics/flux.py +++ b/skyllh/physics/flux.py @@ -523,6 +523,69 @@ def __call__(self, E): flux = self.Phi0 * np.power(E / self.E0, -self.gamma) return flux + def get_integral(self, E_min, E_max): + """Returns the integral value of the flux between the given energy + range. + + Parameters + ---------- + E_min : float | 1d numpy ndarray of float + The lower energy bound of the integration. + E_max : float | 1d numpy ndarray of float + The upper energy bound of the integration. + + Returns + ------- + integral : float | 1d ndarray of float + The integral value(s) of the given integral range(s). + """ + gamma = self.gamma + + # Handle special case for gamma = 1. + if(gamma == 1): + integral = self.Phi0 * self.E0 * ( + np.log(np.abs(E_max)) - np.log(np.abs(E_min))) + return integral + + integral = (self.Phi0 / ((1.-gamma)*np.power(self.E0, -gamma)) * + (np.power(E_max, 1.-gamma) - np.power(E_min, 1.-gamma))) + + return integral + + def get_inv_normed_cdf(self, x, E_min, E_max): + """Calculates the inverse cumulative distribution function value for + each given value of x, which is a number between 0 and 1. + + Parameters + ---------- + x : float | 1d numpy ndarray of float + The argument value(s) of the inverse cumulative distribution + function. Must be between 0 and 1. + E_min : float + The lower energy edge of the flux to be considered. + E_max : float + The upper energy edge of the flux to be considered. + + Returns + ------- + inv_normed_cdf : float | 1d numpy ndarray + The energy value(s) from the inverse normed cumulative distribution + function. + """ + gamma = self.gamma + + if(gamma == 1): + N_0 = np.log(E_max / E_min) + inv_normed_cdf = E_min * np.exp(x * N_0) + return inv_normed_cdf + + N_0 = E_max ** (1. - gamma) - E_min ** (1. - gamma) + inv_normed_cdf = np.power( + x * N_0 + E_min**(1. - gamma), + (1. / (1. - gamma))) + + return inv_normed_cdf + class CutoffPowerLawFlux(PowerLawFlux): """Cut-off power law flux of the form