# seeding.py
"""Seeding algorithms for tricky likelihood optimizations.
Time-dependent analyses with fitted flare profiles are notoriously challenging minimizer
problems. This module provides :class:`UTFSeeder`, which basically works and is
recommended in the tutorial notebooks. It also provides some other classes that you
should ignore.
"""
from __future__ import print_function
import numpy as np
from scipy import ndimage, stats
from . import pdf
import sys
try:
xrange
except NameError:
xrange = range
try:
from itertools import izip
except:
izip = zip
[docs]
class UTFSeeder(object):
"""Seeder for box-profile untriggered flare search.
This class provides some heuristics for choosing reasonable seeds for Gaussian or
box-profile untriggered flare searches. It's absolutely disgusting, but it does
appear to basically work - even for multi-dataset analysis. "Inspired" by some code
in psLab.
"""
[docs]
def __init__(self,
threshold=1000,
gammas=np.r_[0],
test_gammas=np.r_[1, 2, 3, 4],
n_test=np.r_[1,2,3,4,5,10,15,20,25,30,35,40],
perc_best=1,
reseed=True,
refit=None,
debug=False,
ana_keys=None):
self.ana_keys = ana_keys
self.threshold = threshold
self.gammas, self.test_gammas = gammas, test_gammas
self.n_test = n_test
self.perc_best = perc_best
self.reseed = reseed
self.refit = refit
self.debug = debug
[docs]
def __call__(self, ML, bounds, fixed_params, _masks = None):
self._masks = _masks
if 't0' in fixed_params:
return self.seed_dt(ML, bounds, fixed_params)
elif 'dt' in fixed_params:
return self.seed_t0(ML, bounds, fixed_params)
else:
return self.seed_both(ML, bounds)
def select_llhs(self, ML):
if self.ana_keys is None:
return ML.llhs
else:
return [L for L in ML.llhs if L.ana_key in self.ana_keys]
def gamma_mjd_SB(self, ML):
Ls = self.select_llhs(ML)
for gamma in self.gammas:
ML_thresh_mjd, ML_thresh_SB = [], []
for L in Ls:
all_SB = []
all_mjd = []
all_mask = []
thresh_SB = []
thresh_mjd = []
all_evs = []
evs = []
if self._masks is None:
_masks = [None for evaluator in L.evaluators]
else:
_masks = self._masks
for (e, _mask) in izip(L.evaluators, _masks):
if type(e) == pdf.MultiPDFRatioEvaluator:
se = e.evaluators[0].space_evaluator
te = e.evaluators[0].time_evaluator
ee = e.evaluators[1]
elif type(e) == pdf.SpaceTimePDFRatioEvaluator:
se = e.space_evaluator
te = e.time_evaluator
ee = None
kept_i_ev = se.get_kept_i_ev()
se.finalize_i_ev(kept_i_ev)
else:
raise NotImplementedError(f"Evaluator type {type(e)} is not supported")
self.box = te.model.box
self.box_mode = te.model.box_mode or 'center'
evs.append(se.final_ev)
if gamma > 0:
if ee is None:
SB = se(gamma=gamma, _mask=_mask)[0]
else:
Se = ee(gamma=gamma, _mask=_mask)[0]
else:
if ee is None:
SB = np.max([se(gamma=g, _mask=_mask)[0] for g in np.r_[1:4.01:.25] ], axis=0)
else:
Se = np.max([ee(gamma=g, _mask=_mask)[0] for g in np.r_[1:4.01:.25] ], axis=0)
if ee is not None:
Ss = se(_mask=_mask)[0]
SB = Se * Ss
all_SB.append(SB)
all_mjd.append(se.final_ev.mjd)
all_SB = np.concatenate(all_SB)
all_mjd = np.concatenate(all_mjd)
mask = all_SB > self.threshold
thresh_mjd, thresh_SB = all_mjd[mask], all_SB[mask]
ML_thresh_mjd.append(thresh_mjd)
ML_thresh_SB.append(thresh_SB)
mjd, SB = np.concatenate(ML_thresh_mjd), np.concatenate(ML_thresh_SB)
order = np.argsort(mjd)
mjd, SB = mjd[order], SB[order]
yield gamma, mjd, SB
def seed_dt(self, ML, bounds, fixed_params):
# TODO:
# could internally perform pre-emptive llh check as an optimization
# see UTFSeeder.seed_both()
# TODO:
# implement missing UTFSeeder.seed_t0() for sliding-window fitting
seeds = []
t0 = fixed_params['t0']
max_dt = bounds['dt'][1]
for (gamma, thresh_mjd, thresh_SB) in self.gamma_mjd_SB(ML):
dmjd = t0 - thresh_mjd
if self.box_mode == 'center':
mask = np.abs(dmjd) <= 0.5 * max_dt
elif self.box_mode == 'pre':
mask = (0 <= dmjd) & (dmjd <= max_dt)
elif self.box_mode == 'post':
mask = (-max_dt <= dmjd) & (dmjd <= 0)
dmjd, mjd, SB = dmjd[mask], thresh_mjd[mask], thresh_SB[mask]
rank = np.argsort(SB)
# generate seed dicts
for i in rank:
dt = np.abs(dmjd[i])
if self.box_mode == 'center':
dt *= 2
dt = np.nextafter(dt, dt + 1)
if gamma <= 0:
for g in self.test_gammas:
seeds.append(dict(dt=dt, gamma=g))
else:
seeds.append(dict(dt=dt, gamma=gamma))
refit = (not self.box) if self.refit is None else self.refit
exclude_after = ['dt'] if refit else ['t0', 'dt']
return seeds, exclude_after
def seed_both(self, ML, bounds):
# TODO: break this down!
Ls = self.select_llhs(ML)
self.eval_type = type(Ls[0].evaluators[0])
if self.eval_type == pdf.MultiPDFRatioEvaluator:
evss = [
[e.evaluators[0].space_evaluator.final_ev for e in L.evaluators]
for L in Ls]
elif self.eval_type == pdf.SpaceTimePDFRatioEvaluator:
for L in Ls:
kept_i_ev = L.evaluators[0].space_evaluator.get_kept_i_ev()
L.evaluators[0].space_evaluator.finalize_i_ev(kept_i_ev)
evss = [[L.evaluators[0].space_evaluator.final_ev for L in Ls]]
else:
raise NotImplementedError(f"Evaluator type {type(e)} is not supported")
min_dt, max_dt = bounds['dt']
min_t0, max_t0 = bounds['t0']
seeds = []
quals = []
def add_seed(n, t0, dt, gamma):
seed = dict(t0=t0, dt=dt, gamma=gamma)
g = gamma
penalty = 2 * np.log(dt / max_dt)
TS = ML.get_ts(
min(n, 5), t0=t0, dt=dt, gamma=g, use_grl=self.box, _masks=self._masks)
seed['qual'] = qual = penalty + TS
seeds.append(seed)
quals.append(qual)
for (gamma, mjd, SB) in self.gamma_mjd_SB(ML):
N = mjd.size
I = np.r_[:N]
if self.box:
i1, i2 = map(np.ravel, np.meshgrid(I, I))
dmjd = mjd[i2] - mjd[i1]
mjd1, mjd2 = mjd[i1], mjd[i2]
dt_mask = (min_dt <= dmjd) & (dmjd <= max_dt)
i1s, i2s = np.ravel(i1[dt_mask]), np.ravel(i2[dt_mask])
for (i1, i2) in zip(i1s, i2s):
t1, t2 = mjd[i1], mjd[i2]
dt = t2 - t1
if self.box_mode == 'center':
t0 = t1 + 0.5 * dt
elif self.box_mode == 'post':
t0 = t1
elif self.box_mode == 'pre':
t0 = t2
if gamma <= 0:
for g in self.test_gammas:
add_seed(2, t0, dt, g)
else:
add_seed(2, t0, dt, gamma)
else:
for n in self.n_test:
if n > N:
break
i1s, i2s = I[:-n], I[n:]
for (i1, i2) in zip(i1s, i2s):
mjd_i1i2 = mjd[i1:i2]
SB_i1i2 = SB[i1:i2]
t0 = np.mean(mjd_i1i2)
dt_i1i2 = mjd_i1i2 - t0
if n >= 2:
dt = 3 * np.sqrt(np.sum(dt_i1i2**2))
else:
dt = min_dt
if dt > max_dt:
continue
if gamma <= 0:
for g in self.test_gammas:
add_seed(n, t0, dt, g)
else:
add_seed(n, t0, dt, gamma)
if not seeds:
print("Warning:")
print('No events passed S/B threshold for seeding flares (threshold = %s). Try lowering the threshold value when initializing the seeder'%(self.threshold))
print()
return [dict(t0=.5 * (min_t0 + max_t0), dt=.5 * (min_dt + max_dt), gamma=2)], []
quals, seeds = np.array(quals), np.array(seeds)
qual_min = np.percentile(quals, 100 - self.perc_best)
seeds = list(seeds[quals >= qual_min])
if self.reseed:
seeds = [
dict(t0=seed['t0'], dt=seed['dt'], gamma=g, qual=seed['qual'])
for seed in seeds
for g in np.r_[1:4.01:.25]
]
if self.debug:
print('\n'.join(map(str, sorted(seeds, key=lambda s: s[self.debug]))))
sys.stdout.flush()
refit = (not self.box) if self.refit is None else self.refit
exclude_after = [] if refit else ['t0', 'dt']
return seeds, exclude_after
[docs]
class GaussianUTFSeeder(object):
"""Seeder for Gaussian-profile untriggered flare search.
NOTE: you probably want :class:`UTFSeeder`.
"""
[docs]
def __init__(self, n_flare=np.r_[1:11, 15, 25, 50],
remove_duplicates=True, threshold=0.5, ):
self.n_flare = n_flare
self.remove_duplicates = remove_duplicates
self.threshold = threshold
[docs]
def __call__(self, ML, bounds, fixed_params):
# TODO: generalize for stacking or warn if stacking attempted
# TODO: generalize for unanticipated params(e.g. ra,dec fitting)
from . import llh
Ls = self.select_llhs(ML) if isinstance(ML, llh.MultiLLHEvaluator) else [ML]
seeds = []
qualitys = []
dt_min, dt_max = bounds['dt']
#for gamma in np.r_[-2, 1.5:3.51:.25]:
all_gammas = np.r_[-2, -3, 1.25:3.76:.5]
all_gammas = -np.r_[1:4.01:.25]
all_gammas = np.r_[-1, 1:4.01:.5]
for L in Ls:
Se_best = {e: np.max([e.evaluators[1](gamma=g)[0] for g in np.r_[1:4.01:.25] ], axis=0)
for e in L.evaluators}
for gamma in all_gammas:
seed_gammas = np.r_[1:4.01:.25] if gamma < 0 else [gamma]
all_SsSe = []
all_mjd = []
for e in L.evaluators:
se = e.evaluators[0].space_evaluator
te = e.evaluators[0].time_evaluator
box = te.model.box
box_mode = te.model.box_mode
ee = e.evaluators[1]
Ss = se()[0]
if gamma > 0:
Se = ee(gamma=gamma)[0]
else:
Se = Se_best[e]
gamma = -gamma
SsSe = Ss * Se
all_SsSe.append(SsSe)
all_mjd.append(se.final_ev.mjd)
all_SsSe = np.concatenate(all_SsSe)
all_mjd = np.concatenate(all_mjd)
order = np.argsort(all_mjd)
all_SsSe, all_mjd = all_SsSe[order], all_mjd[order]
t0s, dts = [], []
mask = all_SsSe > self.threshold
mask_mjd = all_mjd[mask]
mask_SsSe = all_SsSe[mask]
for n_flare in self.n_flare:
mjd = mask_mjd.copy()
SsSe = mask_SsSe.copy()
if n_flare == 1:
for i_best in np.argsort(SsSe)[-5:]:
for dt in np.r_[dt_max * .125, dt_max * .25, dt_max * .5]:
for g in seed_gammas:
seed = dict(gamma=g, t0=mjd[i_best], dt=dt)
seeds.append(seed)
qualitys.append(np.inf)
continue
#n_best = max(3, int(np.ceil(15. / n_flare)))
n_best = int(np.ceil(15. / (n_flare - 1)))
for i_best in xrange(n_best):
if not mjd.size >= n_flare:
continue
SsSe_cumsum = np.r_[0, np.cumsum(np.log10 (SsSe))]
i = n_flare - 1
quality = (SsSe_cumsum[i:] - SsSe_cumsum[:-i]) #/ (mjd[i:] - mjd[:-i])
dmjd = np.r_[mjd[i:] - mjd[:-i], dt_min]
quality[dmjd > 2 * dt_max] = 0
i_start = np.argmax(quality)
i_end = i_start + n_flare
mjd_flare = mjd[i_start:i_end]
if box:
t0 = mjd_flare[0]
dt = mjd_flare[-1] - t0
if box_mode == 'center':
t0 += 0.5 * dt
elif box_mode == 'pre':
t0 -= dt
else:
t0 = np.mean(mjd_flare)
dt = np.sqrt(np.sum((mjd_flare - t0)**2))
#dt = np.diff(mjd_flare[[0,-1]]) * 1.4
t0 = np.clip(t0, *bounds['t0'])
dt = np.clip(dt, *bounds['dt'])
for g in seed_gammas:
seed = dict(gamma=g, t0=t0, dt=dt)
seeds.append(seed)
qualitys.append(quality[i_start])
if self.remove_duplicates:
mjd = np.r_[mjd[:i_start], mjd[i_end-1:]]
SsSe = np.r_[SsSe[:i_start], SsSe[i_end-1:]]
#seeds = np.array(seeds)
#qualitys = np.array(qualitys)
#seeds = list(seeds[np.argsort(qualitys[-len(qualitys) // 2:])])
#print('\n'.join(map(str, seeds)))
#print(len(seeds))
return seeds
[docs]
class BoxUTFSeeder(object):
"""Seeder for box-profile untriggered flare search.
NOTE: you probably want :class:`UTFSeeder`.
"""
[docs]
def __init__(self, n_test=100, threshold=1, gammas=np.r_[1:4.01:.5],
weight_boundary_only=True,
re_fit=False,
debug=False):
self.n_test = n_test
self.threshold = threshold
self.gammas = gammas
self.weight_boundary_only = weight_boundary_only
self.re_fit = re_fit
self.debug = debug
[docs]
def __call__(self, ML, bounds, fixed_params):
if 't0' in fixed_params:
return self.seed_dt(ML, bounds, fixed_params)
elif 'dt' in fixed_params:
return self.seed_t0(ML, bounds, fixed_params)
else:
return self.seed_both(ML, bounds)
def gamma_mjd_SB(self, ML):
Ls = self.select_llhs(ML)
for gamma in self.gammas:
ML_thresh_mjd, ML_thresh_SB = [], []
for L in Ls:
all_SB = []
all_mjd = []
thresh_SB = []
thresh_mjd = []
all_evs = []
evs = []
for e in L.evaluators:
se = e.evaluators[0].space_evaluator
te = e.evaluators[0].time_evaluator
self.box_mode = te.model.box_mode or 'center'
ee = e.evaluators[1]
evs.append(se.final_ev)
Ss = se()[0]
if gamma > 0:
Se = ee(gamma=gamma)[0]
else:
Se = np.max([ee(gamma=g)[0] for g in np.r_[1:4.01:.25] ], axis=0)
SB = Ss * Se
all_SB.append(SB)
all_mjd.append(se.final_ev.mjd)
all_SB = np.concatenate(all_SB)
all_mjd = np.concatenate(all_mjd)
mask = all_SB > self.threshold
thresh_mjd, thresh_SB = all_mjd[mask], all_SB[mask]
ML_thresh_mjd.append(thresh_mjd)
ML_thresh_SB.append(thresh_SB)
mjd, SB = np.concatenate(ML_thresh_mjd), np.concatenate(ML_thresh_SB)
order = np.argsort(mjd)
mjd, SB = mjd[order], SB[order]
yield gamma, mjd, SB
def seed_dt(self, ML, bounds, fixed_params):
seeds = []
t0 = fixed_params['t0']
max_dt = bounds['dt'][1]
for (gamma, thresh_mjd, thresh_SB) in self.gamma_mjd_SB(ML):
dmjd = t0 - thresh_mjd
if self.box_mode == 'center':
mask = np.abs(dmjd) <= 0.5 * max_dt
elif self.box_mode == 'pre':
mask = (0 <= dmjd) & (dmjd <= max_dt)
elif self.box_mode == 'post':
mask = (-max_dt <= dmjd) & (dmjd <= 0)
dmjd, mjd, SB = dmjd[mask], thresh_mjd[mask], thresh_SB[mask]
rank = np.argsort(SB)
idx = rank[-self.n_test:]
# generate seed dicts
for i in idx:
dt = np.abs(dmjd[i])
if self.box_mode == 'center':
dt *= 2
dt = np.nextafter(dt, dt + 1)
seed = dict(dt=dt, gamma=np.abs(gamma))
seeds.append(seed)
#print('\n'.join(map(str, sorted(seeds, key=lambda s: s['dt']))))
return seeds, 'dt'.split()
def seed_both(self, ML, bounds):
# TODO: break this down!
Ls = self.select_llhs(ML)
min_dt, max_dt = bounds['dt']
seeds = []
quals = []
for (gamma, thresh_mjd, thresh_SB) in self.gamma_mjd_SB(ML):
# inspect pairs
N = thresh_mjd.size
r = np.r_[:N]
i1, i2 = map(np.ravel, np.meshgrid(r, r))
mjd1, mjd2 = map(np.ravel, np.meshgrid(thresh_mjd, thresh_mjd))
SB1, SB2 = map(np.ravel, np.meshgrid(thresh_SB, thresh_SB))
dmjd = mjd2 - mjd1
mask = (min_dt <= dmjd) & (dmjd <= max_dt)
mjd1, mjd2, dmjd, SB1, SB2 = mjd1[mask], mjd2[mask], dmjd[mask], SB1[mask], SB2[mask]
i1, i2 = i1[mask], i2[mask]
#if self.weight_boundary_only:
if gamma < 0:
SBSB = np.log(SB1) + np.log(SB2) + np.log(1e-6/86400. + dmjd)
else:
logSB_cumsum = np.r_[0, np.cumsum(np.log(thresh_SB))]
di = i2 - i1
SBSB = (
(logSB_cumsum[i2+1] - logSB_cumsum[i1])
+ (1 - di) * np.log(1e-6/86400. + dmjd)
)
#mask = (0 < dmjd) & (dmjd <= max_dt)
#mjd1, mjd2, dmjd, SBSB = mjd1[mask], mjd2[mask], dmjd[mask], SBSB[mask]
rank = np.argsort(SBSB)
idx = rank[-self.n_test:]
#idx = rank
#print(len(idx))
# generate seed dicts
for i in idx:
#print(gamma, px[i], dt, py[i])
dt = dmjd[i]
if self.box_mode == 'center':
t0 = mjd1[i] + 0.5 * dt
elif self.box_mode == 'post':
t0 = mjd1[i]
elif self.box_mode == 'pre':
t0 = mjd2[i]
if gamma < 0:
for this_gamma in np.r_[1:4.01:.25]:
seed = dict(t0=t0, dt=dt, gamma=this_gamma)
seeds.append(seed)
else:
seed = dict(t0=t0, dt=dt, gamma=gamma, qual=SBSB[i])
seeds.append(seed)
quals.append(SBSB[i])
#quals, seeds = np.array(quals), np.array(seeds)
#qual_best = np.max(quals)
#qual_ok = qual_best * self.qual_threshold
#seeds = list(seeds[quals >= qual_ok])
if self.debug:
print('\n'.join(map(str, sorted(seeds, key=lambda s: s[self.debug]))))
exclude_after = [] if self.re_fit else 't0 dt'.split()
return seeds, exclude_after
class WorseGaussianUTFSeeder (object):
# TODO: is this one really worse?
# -- or did it just work very poorly when subject to dt/logdt confusion?
def __init__(self, n_peak=10, n_dt=10):
self.n_peak = n_peak
self.n_dt = n_dt
def __call__(self, ML, bounds, fixed_params):
from . import llh
def get_peaks(x, y):
i = np.where((np.diff(y[:-1]) > 0) & (np.diff(y[1:]) < 0))[0]
return x[i+1], y[i+1]
Ls = self.select_llhs(ML) if isinstance(ML, llh.MultiLLHEvaluator) else [ML]
dt_min, dt_max = bounds['dt']
all_dts = np.linspace(dt_min, dt_max, self.n_dt + 1)[1:]
all_gammas = np.r_[1.25:3.76:.5]
seeds = []
for gamma in all_gammas:
all_SB = []
all_mjd = []
for L in Ls:
for e in L.evaluators:
se = e.evaluators[0].space_evaluator
ee = e.evaluators[1]
Ss = se()[0]
Se = ee(gamma=gamma)[0]
SB = Ss * Se
all_SB.append(SB)
all_mjd.append(se.final_ev.mjd)
SB = np.concatenate(all_SB)
mjd = np.concatenate(all_mjd)
order = np.argsort(mjd)
SB, mjd = SB[order], mjd[order]
mask = SB > 0
SB, mjd = SB[mask], mjd[mask]
x0 = mjd
y0 = SB.cumsum() - SB.sum()/np.diff(mjd[[0,-1]]) * (mjd - mjd[0])
y0 = np.diff(y0)
duration = np.diff(x0[[0,-1]])
bins_per_day = 100
x = np.linspace(x0.min(), x0.max(), bins_per_day*int(duration))
y1 = np.interp(x, x0[:-1], y0)
for dt in all_dts:
y = ndimage.gaussian_filter(y1, bins_per_day * dt)
px, py = get_peaks(x, y)
order2 = np.argsort(py)
for i in order2[-self.n_peak:]:
#print(gamma, px[i], dt, py[i])
seed = dict(t0=px[i], dt=dt, gamma=gamma)
seeds.append(seed)
#print('\n'.join(map(str, seeds)))
return seeds, []