Source code for csky.seeding

# seeding.py

"""Seeding algorithms for tricky likelihood optimizations.

Time-dependent analyses with fitted flare profiles are notoriously challenging minimizer
problems.  This module provides :class:`UTFSeeder`, which basically works and is
recommended in the tutorial notebooks.  It also provides some other classes that you
should ignore.
"""

from __future__ import print_function

import numpy as np
from scipy import ndimage, stats

from . import pdf

import sys

try:
    xrange
except NameError:
    xrange = range

try:
    from itertools import izip
except:
    izip = zip

[docs] class UTFSeeder(object): """Seeder for box-profile untriggered flare search. This class provides some heuristics for choosing reasonable seeds for Gaussian or box-profile untriggered flare searches. It's absolutely disgusting, but it does appear to basically work - even for multi-dataset analysis. "Inspired" by some code in psLab. """
[docs] def __init__(self, threshold=1000, gammas=np.r_[0], test_gammas=np.r_[1, 2, 3, 4], n_test=np.r_[1,2,3,4,5,10,15,20,25,30,35,40], perc_best=1, reseed=True, refit=None, debug=False, ana_keys=None): self.ana_keys = ana_keys self.threshold = threshold self.gammas, self.test_gammas = gammas, test_gammas self.n_test = n_test self.perc_best = perc_best self.reseed = reseed self.refit = refit self.debug = debug
[docs] def __call__(self, ML, bounds, fixed_params, _masks = None): self._masks = _masks if 't0' in fixed_params: return self.seed_dt(ML, bounds, fixed_params) elif 'dt' in fixed_params: return self.seed_t0(ML, bounds, fixed_params) else: return self.seed_both(ML, bounds)
def select_llhs(self, ML): if self.ana_keys is None: return ML.llhs else: return [L for L in ML.llhs if L.ana_key in self.ana_keys] def gamma_mjd_SB(self, ML): Ls = self.select_llhs(ML) for gamma in self.gammas: ML_thresh_mjd, ML_thresh_SB = [], [] for L in Ls: all_SB = [] all_mjd = [] all_mask = [] thresh_SB = [] thresh_mjd = [] all_evs = [] evs = [] if self._masks is None: _masks = [None for evaluator in L.evaluators] else: _masks = self._masks for (e, _mask) in izip(L.evaluators, _masks): if type(e) == pdf.MultiPDFRatioEvaluator: se = e.evaluators[0].space_evaluator te = e.evaluators[0].time_evaluator ee = e.evaluators[1] elif type(e) == pdf.SpaceTimePDFRatioEvaluator: se = e.space_evaluator te = e.time_evaluator ee = None kept_i_ev = se.get_kept_i_ev() se.finalize_i_ev(kept_i_ev) else: raise NotImplementedError(f"Evaluator type {type(e)} is not supported") self.box = te.model.box self.box_mode = te.model.box_mode or 'center' evs.append(se.final_ev) if gamma > 0: if ee is None: SB = se(gamma=gamma, _mask=_mask)[0] else: Se = ee(gamma=gamma, _mask=_mask)[0] else: if ee is None: SB = np.max([se(gamma=g, _mask=_mask)[0] for g in np.r_[1:4.01:.25] ], axis=0) else: Se = np.max([ee(gamma=g, _mask=_mask)[0] for g in np.r_[1:4.01:.25] ], axis=0) if ee is not None: Ss = se(_mask=_mask)[0] SB = Se * Ss all_SB.append(SB) all_mjd.append(se.final_ev.mjd) all_SB = np.concatenate(all_SB) all_mjd = np.concatenate(all_mjd) mask = all_SB > self.threshold thresh_mjd, thresh_SB = all_mjd[mask], all_SB[mask] ML_thresh_mjd.append(thresh_mjd) ML_thresh_SB.append(thresh_SB) mjd, SB = np.concatenate(ML_thresh_mjd), np.concatenate(ML_thresh_SB) order = np.argsort(mjd) mjd, SB = mjd[order], SB[order] yield gamma, mjd, SB def seed_dt(self, ML, bounds, fixed_params): # TODO: # could internally perform pre-emptive llh check as an optimization # see UTFSeeder.seed_both() # TODO: # implement missing UTFSeeder.seed_t0() for sliding-window fitting seeds = [] t0 = fixed_params['t0'] max_dt = bounds['dt'][1] for (gamma, thresh_mjd, thresh_SB) in self.gamma_mjd_SB(ML): dmjd = t0 - thresh_mjd if self.box_mode == 'center': mask = np.abs(dmjd) <= 0.5 * max_dt elif self.box_mode == 'pre': mask = (0 <= dmjd) & (dmjd <= max_dt) elif self.box_mode == 'post': mask = (-max_dt <= dmjd) & (dmjd <= 0) dmjd, mjd, SB = dmjd[mask], thresh_mjd[mask], thresh_SB[mask] rank = np.argsort(SB) # generate seed dicts for i in rank: dt = np.abs(dmjd[i]) if self.box_mode == 'center': dt *= 2 dt = np.nextafter(dt, dt + 1) if gamma <= 0: for g in self.test_gammas: seeds.append(dict(dt=dt, gamma=g)) else: seeds.append(dict(dt=dt, gamma=gamma)) refit = (not self.box) if self.refit is None else self.refit exclude_after = ['dt'] if refit else ['t0', 'dt'] return seeds, exclude_after def seed_both(self, ML, bounds): # TODO: break this down! Ls = self.select_llhs(ML) self.eval_type = type(Ls[0].evaluators[0]) if self.eval_type == pdf.MultiPDFRatioEvaluator: evss = [ [e.evaluators[0].space_evaluator.final_ev for e in L.evaluators] for L in Ls] elif self.eval_type == pdf.SpaceTimePDFRatioEvaluator: for L in Ls: kept_i_ev = L.evaluators[0].space_evaluator.get_kept_i_ev() L.evaluators[0].space_evaluator.finalize_i_ev(kept_i_ev) evss = [[L.evaluators[0].space_evaluator.final_ev for L in Ls]] else: raise NotImplementedError(f"Evaluator type {type(e)} is not supported") min_dt, max_dt = bounds['dt'] min_t0, max_t0 = bounds['t0'] seeds = [] quals = [] def add_seed(n, t0, dt, gamma): seed = dict(t0=t0, dt=dt, gamma=gamma) g = gamma penalty = 2 * np.log(dt / max_dt) TS = ML.get_ts( min(n, 5), t0=t0, dt=dt, gamma=g, use_grl=self.box, _masks=self._masks) seed['qual'] = qual = penalty + TS seeds.append(seed) quals.append(qual) for (gamma, mjd, SB) in self.gamma_mjd_SB(ML): N = mjd.size I = np.r_[:N] if self.box: i1, i2 = map(np.ravel, np.meshgrid(I, I)) dmjd = mjd[i2] - mjd[i1] mjd1, mjd2 = mjd[i1], mjd[i2] dt_mask = (min_dt <= dmjd) & (dmjd <= max_dt) i1s, i2s = np.ravel(i1[dt_mask]), np.ravel(i2[dt_mask]) for (i1, i2) in zip(i1s, i2s): t1, t2 = mjd[i1], mjd[i2] dt = t2 - t1 if self.box_mode == 'center': t0 = t1 + 0.5 * dt elif self.box_mode == 'post': t0 = t1 elif self.box_mode == 'pre': t0 = t2 if gamma <= 0: for g in self.test_gammas: add_seed(2, t0, dt, g) else: add_seed(2, t0, dt, gamma) else: for n in self.n_test: if n > N: break i1s, i2s = I[:-n], I[n:] for (i1, i2) in zip(i1s, i2s): mjd_i1i2 = mjd[i1:i2] SB_i1i2 = SB[i1:i2] t0 = np.mean(mjd_i1i2) dt_i1i2 = mjd_i1i2 - t0 if n >= 2: dt = 3 * np.sqrt(np.sum(dt_i1i2**2)) else: dt = min_dt if dt > max_dt: continue if gamma <= 0: for g in self.test_gammas: add_seed(n, t0, dt, g) else: add_seed(n, t0, dt, gamma) if not seeds: print("Warning:") print('No events passed S/B threshold for seeding flares (threshold = %s). Try lowering the threshold value when initializing the seeder'%(self.threshold)) print() return [dict(t0=.5 * (min_t0 + max_t0), dt=.5 * (min_dt + max_dt), gamma=2)], [] quals, seeds = np.array(quals), np.array(seeds) qual_min = np.percentile(quals, 100 - self.perc_best) seeds = list(seeds[quals >= qual_min]) if self.reseed: seeds = [ dict(t0=seed['t0'], dt=seed['dt'], gamma=g, qual=seed['qual']) for seed in seeds for g in np.r_[1:4.01:.25] ] if self.debug: print('\n'.join(map(str, sorted(seeds, key=lambda s: s[self.debug])))) sys.stdout.flush() refit = (not self.box) if self.refit is None else self.refit exclude_after = [] if refit else ['t0', 'dt'] return seeds, exclude_after
[docs] class GaussianUTFSeeder(object): """Seeder for Gaussian-profile untriggered flare search. NOTE: you probably want :class:`UTFSeeder`. """
[docs] def __init__(self, n_flare=np.r_[1:11, 15, 25, 50], remove_duplicates=True, threshold=0.5, ): self.n_flare = n_flare self.remove_duplicates = remove_duplicates self.threshold = threshold
[docs] def __call__(self, ML, bounds, fixed_params): # TODO: generalize for stacking or warn if stacking attempted # TODO: generalize for unanticipated params(e.g. ra,dec fitting) from . import llh Ls = self.select_llhs(ML) if isinstance(ML, llh.MultiLLHEvaluator) else [ML] seeds = [] qualitys = [] dt_min, dt_max = bounds['dt'] #for gamma in np.r_[-2, 1.5:3.51:.25]: all_gammas = np.r_[-2, -3, 1.25:3.76:.5] all_gammas = -np.r_[1:4.01:.25] all_gammas = np.r_[-1, 1:4.01:.5] for L in Ls: Se_best = {e: np.max([e.evaluators[1](gamma=g)[0] for g in np.r_[1:4.01:.25] ], axis=0) for e in L.evaluators} for gamma in all_gammas: seed_gammas = np.r_[1:4.01:.25] if gamma < 0 else [gamma] all_SsSe = [] all_mjd = [] for e in L.evaluators: se = e.evaluators[0].space_evaluator te = e.evaluators[0].time_evaluator box = te.model.box box_mode = te.model.box_mode ee = e.evaluators[1] Ss = se()[0] if gamma > 0: Se = ee(gamma=gamma)[0] else: Se = Se_best[e] gamma = -gamma SsSe = Ss * Se all_SsSe.append(SsSe) all_mjd.append(se.final_ev.mjd) all_SsSe = np.concatenate(all_SsSe) all_mjd = np.concatenate(all_mjd) order = np.argsort(all_mjd) all_SsSe, all_mjd = all_SsSe[order], all_mjd[order] t0s, dts = [], [] mask = all_SsSe > self.threshold mask_mjd = all_mjd[mask] mask_SsSe = all_SsSe[mask] for n_flare in self.n_flare: mjd = mask_mjd.copy() SsSe = mask_SsSe.copy() if n_flare == 1: for i_best in np.argsort(SsSe)[-5:]: for dt in np.r_[dt_max * .125, dt_max * .25, dt_max * .5]: for g in seed_gammas: seed = dict(gamma=g, t0=mjd[i_best], dt=dt) seeds.append(seed) qualitys.append(np.inf) continue #n_best = max(3, int(np.ceil(15. / n_flare))) n_best = int(np.ceil(15. / (n_flare - 1))) for i_best in xrange(n_best): if not mjd.size >= n_flare: continue SsSe_cumsum = np.r_[0, np.cumsum(np.log10 (SsSe))] i = n_flare - 1 quality = (SsSe_cumsum[i:] - SsSe_cumsum[:-i]) #/ (mjd[i:] - mjd[:-i]) dmjd = np.r_[mjd[i:] - mjd[:-i], dt_min] quality[dmjd > 2 * dt_max] = 0 i_start = np.argmax(quality) i_end = i_start + n_flare mjd_flare = mjd[i_start:i_end] if box: t0 = mjd_flare[0] dt = mjd_flare[-1] - t0 if box_mode == 'center': t0 += 0.5 * dt elif box_mode == 'pre': t0 -= dt else: t0 = np.mean(mjd_flare) dt = np.sqrt(np.sum((mjd_flare - t0)**2)) #dt = np.diff(mjd_flare[[0,-1]]) * 1.4 t0 = np.clip(t0, *bounds['t0']) dt = np.clip(dt, *bounds['dt']) for g in seed_gammas: seed = dict(gamma=g, t0=t0, dt=dt) seeds.append(seed) qualitys.append(quality[i_start]) if self.remove_duplicates: mjd = np.r_[mjd[:i_start], mjd[i_end-1:]] SsSe = np.r_[SsSe[:i_start], SsSe[i_end-1:]] #seeds = np.array(seeds) #qualitys = np.array(qualitys) #seeds = list(seeds[np.argsort(qualitys[-len(qualitys) // 2:])]) #print('\n'.join(map(str, seeds))) #print(len(seeds)) return seeds
[docs] class BoxUTFSeeder(object): """Seeder for box-profile untriggered flare search. NOTE: you probably want :class:`UTFSeeder`. """
[docs] def __init__(self, n_test=100, threshold=1, gammas=np.r_[1:4.01:.5], weight_boundary_only=True, re_fit=False, debug=False): self.n_test = n_test self.threshold = threshold self.gammas = gammas self.weight_boundary_only = weight_boundary_only self.re_fit = re_fit self.debug = debug
[docs] def __call__(self, ML, bounds, fixed_params): if 't0' in fixed_params: return self.seed_dt(ML, bounds, fixed_params) elif 'dt' in fixed_params: return self.seed_t0(ML, bounds, fixed_params) else: return self.seed_both(ML, bounds)
def gamma_mjd_SB(self, ML): Ls = self.select_llhs(ML) for gamma in self.gammas: ML_thresh_mjd, ML_thresh_SB = [], [] for L in Ls: all_SB = [] all_mjd = [] thresh_SB = [] thresh_mjd = [] all_evs = [] evs = [] for e in L.evaluators: se = e.evaluators[0].space_evaluator te = e.evaluators[0].time_evaluator self.box_mode = te.model.box_mode or 'center' ee = e.evaluators[1] evs.append(se.final_ev) Ss = se()[0] if gamma > 0: Se = ee(gamma=gamma)[0] else: Se = np.max([ee(gamma=g)[0] for g in np.r_[1:4.01:.25] ], axis=0) SB = Ss * Se all_SB.append(SB) all_mjd.append(se.final_ev.mjd) all_SB = np.concatenate(all_SB) all_mjd = np.concatenate(all_mjd) mask = all_SB > self.threshold thresh_mjd, thresh_SB = all_mjd[mask], all_SB[mask] ML_thresh_mjd.append(thresh_mjd) ML_thresh_SB.append(thresh_SB) mjd, SB = np.concatenate(ML_thresh_mjd), np.concatenate(ML_thresh_SB) order = np.argsort(mjd) mjd, SB = mjd[order], SB[order] yield gamma, mjd, SB def seed_dt(self, ML, bounds, fixed_params): seeds = [] t0 = fixed_params['t0'] max_dt = bounds['dt'][1] for (gamma, thresh_mjd, thresh_SB) in self.gamma_mjd_SB(ML): dmjd = t0 - thresh_mjd if self.box_mode == 'center': mask = np.abs(dmjd) <= 0.5 * max_dt elif self.box_mode == 'pre': mask = (0 <= dmjd) & (dmjd <= max_dt) elif self.box_mode == 'post': mask = (-max_dt <= dmjd) & (dmjd <= 0) dmjd, mjd, SB = dmjd[mask], thresh_mjd[mask], thresh_SB[mask] rank = np.argsort(SB) idx = rank[-self.n_test:] # generate seed dicts for i in idx: dt = np.abs(dmjd[i]) if self.box_mode == 'center': dt *= 2 dt = np.nextafter(dt, dt + 1) seed = dict(dt=dt, gamma=np.abs(gamma)) seeds.append(seed) #print('\n'.join(map(str, sorted(seeds, key=lambda s: s['dt'])))) return seeds, 'dt'.split() def seed_both(self, ML, bounds): # TODO: break this down! Ls = self.select_llhs(ML) min_dt, max_dt = bounds['dt'] seeds = [] quals = [] for (gamma, thresh_mjd, thresh_SB) in self.gamma_mjd_SB(ML): # inspect pairs N = thresh_mjd.size r = np.r_[:N] i1, i2 = map(np.ravel, np.meshgrid(r, r)) mjd1, mjd2 = map(np.ravel, np.meshgrid(thresh_mjd, thresh_mjd)) SB1, SB2 = map(np.ravel, np.meshgrid(thresh_SB, thresh_SB)) dmjd = mjd2 - mjd1 mask = (min_dt <= dmjd) & (dmjd <= max_dt) mjd1, mjd2, dmjd, SB1, SB2 = mjd1[mask], mjd2[mask], dmjd[mask], SB1[mask], SB2[mask] i1, i2 = i1[mask], i2[mask] #if self.weight_boundary_only: if gamma < 0: SBSB = np.log(SB1) + np.log(SB2) + np.log(1e-6/86400. + dmjd) else: logSB_cumsum = np.r_[0, np.cumsum(np.log(thresh_SB))] di = i2 - i1 SBSB = ( (logSB_cumsum[i2+1] - logSB_cumsum[i1]) + (1 - di) * np.log(1e-6/86400. + dmjd) ) #mask = (0 < dmjd) & (dmjd <= max_dt) #mjd1, mjd2, dmjd, SBSB = mjd1[mask], mjd2[mask], dmjd[mask], SBSB[mask] rank = np.argsort(SBSB) idx = rank[-self.n_test:] #idx = rank #print(len(idx)) # generate seed dicts for i in idx: #print(gamma, px[i], dt, py[i]) dt = dmjd[i] if self.box_mode == 'center': t0 = mjd1[i] + 0.5 * dt elif self.box_mode == 'post': t0 = mjd1[i] elif self.box_mode == 'pre': t0 = mjd2[i] if gamma < 0: for this_gamma in np.r_[1:4.01:.25]: seed = dict(t0=t0, dt=dt, gamma=this_gamma) seeds.append(seed) else: seed = dict(t0=t0, dt=dt, gamma=gamma, qual=SBSB[i]) seeds.append(seed) quals.append(SBSB[i]) #quals, seeds = np.array(quals), np.array(seeds) #qual_best = np.max(quals) #qual_ok = qual_best * self.qual_threshold #seeds = list(seeds[quals >= qual_ok]) if self.debug: print('\n'.join(map(str, sorted(seeds, key=lambda s: s[self.debug])))) exclude_after = [] if self.re_fit else 't0 dt'.split() return seeds, exclude_after
class WorseGaussianUTFSeeder (object): # TODO: is this one really worse? # -- or did it just work very poorly when subject to dt/logdt confusion? def __init__(self, n_peak=10, n_dt=10): self.n_peak = n_peak self.n_dt = n_dt def __call__(self, ML, bounds, fixed_params): from . import llh def get_peaks(x, y): i = np.where((np.diff(y[:-1]) > 0) & (np.diff(y[1:]) < 0))[0] return x[i+1], y[i+1] Ls = self.select_llhs(ML) if isinstance(ML, llh.MultiLLHEvaluator) else [ML] dt_min, dt_max = bounds['dt'] all_dts = np.linspace(dt_min, dt_max, self.n_dt + 1)[1:] all_gammas = np.r_[1.25:3.76:.5] seeds = [] for gamma in all_gammas: all_SB = [] all_mjd = [] for L in Ls: for e in L.evaluators: se = e.evaluators[0].space_evaluator ee = e.evaluators[1] Ss = se()[0] Se = ee(gamma=gamma)[0] SB = Ss * Se all_SB.append(SB) all_mjd.append(se.final_ev.mjd) SB = np.concatenate(all_SB) mjd = np.concatenate(all_mjd) order = np.argsort(mjd) SB, mjd = SB[order], mjd[order] mask = SB > 0 SB, mjd = SB[mask], mjd[mask] x0 = mjd y0 = SB.cumsum() - SB.sum()/np.diff(mjd[[0,-1]]) * (mjd - mjd[0]) y0 = np.diff(y0) duration = np.diff(x0[[0,-1]]) bins_per_day = 100 x = np.linspace(x0.min(), x0.max(), bins_per_day*int(duration)) y1 = np.interp(x, x0[:-1], y0) for dt in all_dts: y = ndimage.gaussian_filter(y1, bins_per_day * dt) px, py = get_peaks(x, y) order2 = np.argsort(py) for i in order2[-self.n_peak:]: #print(gamma, px[i], dt, py[i]) seed = dict(t0=px[i], dt=dt, gamma=gamma) seeds.append(seed) #print('\n'.join(map(str, seeds))) return seeds, []