{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Working with arbitrary data\n",
    "\n",
    ".. contents:: :local:\n",
    "\n",
    "In this tutorial, we demonstrate working with arbitrary, rather than pre-defined, data."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import copy\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "import histlite as hl\n",
    "import csky as cy\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/mnt/lfs7/user/ssclafani/software/external/csky/csky/plotting.py:92: MatplotlibDeprecationWarning: Support for setting the 'text.latex.preamble' or 'pgf.preamble' rcParam to a list of strings is deprecated since 3.3 and will be removed two minor releases later; set it to a single string instead.\n",
      "  r'\\SetSymbolFont{operators}   {sans}{OT1}{cmss} {m}{n}'\n"
     ]
    }
   ],
   "source": [
    "cy.plotting.mrichman_mpl()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get some real data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "timer = cy.timing.Timer()\n",
    "time = timer.time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Setting up Analysis for:\n",
      "IC86_2011\n",
      "Setting up IC86_2011...\n",
      "Reading /data/ana/analyses/ps_tracks/version-003-p02/IC86_2011_MC.npy ...\n",
      "Reading /data/ana/analyses/ps_tracks/version-003-p02/IC86_2011_exp.npy ...\n",
      "Reading /data/ana/analyses/ps_tracks/version-003-p02/GRL/IC86_2011_exp.npy ...\n",
      "Energy PDF Ratio Model...\n",
      "  * gamma = 4.0000 ...\n",
      "Signal Acceptance Model...\n",
      "  * gamma = 4.0000 ...\n",
      "Done.\n",
      "\n",
      "0:00:14.625254 elapsed.\n"
     ]
    }
   ],
   "source": [
    "with time('ana setup: original'):\n",
    "    ana = cy.get_analysis(cy.selections.Repository(), 'version-003-p02', cy.selections.PSDataSpecs.ps_2011)\n",
    "a = ana[0]\n",
    "data, sig, livetime = a.data, a.sig, a.livetime"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generate modified data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can choose your workflow here: modify the data and then define a `DataSpec`, or include the dataset modifications inside the `DataSpec`.  I'll go with the latter, and define a version of IC86 that is magically better in that its angular reconstruction is twice as good.  By inheriting from the original, we reuse anything we don't specifically redefine: in particular, binning settings for PDFs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Setting up Analysis for:\n",
      "IC86_2011_MagicallyBetter\n",
      "Setting up IC86_2011_MagicallyBetter...\n",
      "Reading /data/ana/analyses/ps_tracks/version-003-p02/GRL/IC86_2011_exp.npy ...\n",
      "Energy PDF Ratio Model...\n",
      "  * gamma = 4.0000 ...\n",
      "Signal Acceptance Model...\n",
      "  * gamma = 4.0000 ...\n",
      "Done.\n",
      "\n",
      "0:00:12.286926 elapsed.\n"
     ]
    }
   ],
   "source": [
    "class IC86_2011_MagicallyBetter(cy.selections.PSDataSpecs.IC86_2011):\n",
    "    \"\"\"Better version of IC86-2011\"\"\"\n",
    "    def dataset_modifications(self, ds):\n",
    "        \"\"\"Improve angres by a factor of two.\"\"\"\n",
    "        sig = copy.deepcopy(ds.sig)\n",
    "        data = copy.deepcopy(ds.data)\n",
    "        sig['xra'] = .5 * sig.xra\n",
    "        sig['xdec'] = .5 * sig.xdec\n",
    "        # this particular geometric code has a silly (dec,ra) convention, please forgive\n",
    "        sig['dec'], sig['ra'] = cy.coord.rotate_xaxis_to_source(\n",
    "            sig.true_dec, sig.true_ra, sig.xdec, sig.xra, latlon=True)\n",
    "        sigma_floor = np.radians(0.2)\n",
    "        sig['sigma'] = np.maximum(sigma_floor, .5 * sig.sigma)\n",
    "        data['sigma'] = np.maximum(sigma_floor, .5 * data.sigma)\n",
    "        ds.sig, ds.data = sig, data\n",
    "        \n",
    "    _path_data = data\n",
    "    _path_sig = sig\n",
    "    _livetime = a.livetime\n",
    "    \n",
    "with time('ana setup: modified'):\n",
    "    ana2 = cy.get_analysis(cy.selections.Repository(), 'version-003-p02', IC86_2011_MagicallyBetter)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "src = cy.sources(0, 0)\n",
    "tr = cy.get_trial_runner(src=src, ana=ana, mp_cpus=10)\n",
    "tr2 = cy.get_trial_runner(src=src, ana=ana2, mp_cpus=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Performing 1000 background trials using 10 cores:\n",
      "       1000/1000 trials complete.   \n",
      "\n",
      "0:00:02.381897 elapsed.\n"
     ]
    }
   ],
   "source": [
    "with time('bg 1'):\n",
    "    b = cy.dists.Chi2TSD(tr.get_many_fits(1000, seed=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "sens_kw = dict(beta=0.9, n_sig_step=1, batch_size=500, tol=.03, seed=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Start time: 2021-07-30 15:13:30.866028\n",
      "Using 10 cores.\n",
      "* Starting initial scan for 90% of 50 trials with TS >= 0.052...\n",
      "  n_sig = 1.000 ... frac = 0.74000\n",
      "  n_sig = 2.000 ... frac = 0.88000\n",
      "  n_sig = 3.000 ... frac = 0.90000\n",
      "* Generating batches of 500 trials...\n",
      "n_trials | n_inj    0.00    1.20    2.40    3.60    4.80    6.00  |  n_sig(relative error)\n",
      "500      |         45.4%   73.6%   85.4%   95.0%   95.0%   98.2%  |      2.895 (+/-  4.3%) [chi2.cdf]\n",
      "1000     |         47.4%   72.7%   85.8%   94.4%   96.1%   98.2%  |      2.928 (+/-  3.3%) [chi2.cdf]\n",
      "1500     |         48.6%   72.6%   84.9%   93.9%   96.0%   98.2%  |      3.009 (+/-  2.4%) [chi2.cdf]\n",
      "End time: 2021-07-30 15:13:54.452319\n",
      "Elapsed time: 0:00:23.586291\n",
      "\n",
      "0:00:23.587761 elapsed.\n"
     ]
    }
   ],
   "source": [
    "with time('sens 1'):\n",
    "    sens = tr.find_n_sig(b.median(), **sens_kw)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Performing 1000 background trials using 10 cores:\n",
      "       1000/1000 trials complete.   \n",
      "\n",
      "0:00:02.341552 elapsed.\n"
     ]
    }
   ],
   "source": [
    "with time('bg 2'):\n",
    "    b2 = cy.dists.Chi2TSD(tr2.get_many_fits(1000, seed=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Start time: 2021-07-30 15:14:04.353769\n",
      "Using 10 cores.\n",
      "* Starting initial scan for 90% of 50 trials with TS >= 0.022...\n",
      "  n_sig = 1.000 ... frac = 0.80000\n",
      "  n_sig = 2.000 ... frac = 0.90000\n",
      "* Generating batches of 500 trials...\n",
      "n_trials | n_inj    0.00    0.80    1.60    2.40    3.20    4.00  |  n_sig(relative error)\n",
      "500      |         50.2%   67.6%   85.0%   87.8%   94.8%   95.8%  |      2.454 (+/-  4.9%) [chi2.cdf]\n",
      "1000     |         51.1%   69.7%   84.0%   89.6%   94.7%   97.0%  |      2.358 (+/-  3.2%) [chi2.cdf]\n",
      "1500     |         49.3%   70.6%   84.1%   89.3%   94.9%   96.7%  |      2.345 (+/-  2.6%) [chi2.cdf]\n",
      "End time: 2021-07-30 15:14:23.955296\n",
      "Elapsed time: 0:00:19.601527\n",
      "\n",
      "0:00:19.605097 elapsed.\n"
     ]
    }
   ],
   "source": [
    "with time('sens 2'):\n",
    "    sens2 = tr2.find_n_sig(b2.median(), **sens_kw)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Do these compare about right?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1.2831309784417606, 1.4142135623730951)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sens['n_sig'] / sens2['n_sig'], np.sqrt(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "I guess I buy it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ana setup: original | 0:00:15.504567\n",
      "ana setup: modified | 0:00:14.002988\n",
      "bg 1                | 0:00:02.314820\n",
      "sens 1              | 0:00:21.714302\n",
      "bg 2                | 0:00:02.306961\n",
      "sens 2              | 0:00:50.869679\n",
      "------------------------------------\n",
      "total               | 0:01:46.713317\n"
     ]
    }
   ],
   "source": [
    "print(timer)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}