Source code for icefit.abcd

# Minimal "ABCD" estimator and its confidence intervals
#
# Run tests with: pytest icefit/abcd.py -rP
#
# m.mieskolainen@imperial.ac.uk, 2023

import copy
import numpy as np
import scipy
import scipy.stats as stats
import numba

from icefit.cortools import prc_CI


[docs] def ABCD_eq(b,c,d): """ Basic estimator formula for count in 'A' (signal domain) Independent (X,Y) gives us the definition A = B x C / D Y |C | A |----- |D | B 0------> X """ return b * c / d
[docs] def ABCD_err(b,c,d, method='errorprop', N=int(1e5), alpha=0.32, lrange=5): """ ABCD uncertainty (confidence interval) methods DEFINITION: A = B x C / D Args: b,c,d : input value method : 'errorprop', 'poisson', 'bootstrap', 'likelihood' N : number of random samples for sampling based methods alpha : confidence level ('errorprop' fixed at CL68) lrange : likelihood scan range Returns: confidence interval array (lower,upper) """ # Analytical error propagation (1st order Taylor expansion) if method == 'errorprop': sigma = np.sqrt( (c/d)**2 * b + (b*c/d**2)**2 * d + (b/d)**2 * c) A = ABCD_eq(b=b,c=c,d=d) # Return symmetric (Gaussian like) 1 sigma (68%) confidence interval return np.array([A - sigma, A + sigma]) # Poisson MC error propagation elif method == 'poisson_prc': B_new = np.random.poisson(lam=b, size=N) C_new = np.random.poisson(lam=c, size=N) D_new = np.random.poisson(lam=d, size=N) A = ABCD_eq(b=B_new, c=C_new, d=D_new) return prc_CI(x=A, alpha=alpha) # Efron's percentile bootstrap elif method == 'bootstrap_prc': # Re-generate input data (N.B. this could be generalized to weighted data) T = [1,2,3] # integer labels ~ B,C,D data = np.concatenate((T[0]*np.ones(int(b)), T[1]*np.ones(int(c)), T[2]*np.ones(int(d))), axis=None) # Generate bootstrap samples A_new = np.zeros(N) for i in range(N): ind = np.random.choice(range(len(data)), size=len(data), replace=True) bs = data[ind] A_new[i] = ABCD_eq(b=np.sum(bs==T[0]), c=np.sum(bs==T[1]), d=np.sum(bs==T[2])) return prc_CI(x=A_new, alpha=alpha) elif method == 'likelihood': def optfunc(theta): return ABCD_2NLL(B=b, C=c, D=d, mu=theta[0], mu_B=theta[1], mu_D=theta[2]) # Initial guess and optimize (reservation for more general models) theta0 = np.array([ABCD_eq(b=b, c=c, d=d) / b, b, d]) res = scipy.optimize.minimize(optfunc, theta0, method='Nelder-Mead', tol=1e-6) # ---------------------------------------------------------------- # Likelihood scan over A = mu * mu_B product plane x0 = np.linspace(res['x'][0]/lrange, res['x'][0]*lrange, int(1e3)) x1 = np.linspace(res['x'][1]/lrange, res['x'][1]*lrange, int(1e3)) # keep (mu_D = theta[2]) to its optimal value (~profiled) theta = np.array(copy.deepcopy(res['x'])) chi2cut = stats.chi2.ppf(1 - alpha, df=1) # note NDF=1 (we have a product) values = [] for i in range(len(x0)): for j in range(len(x1)): theta[0] = x0[i] theta[1] = x1[j] if optfunc(theta) < res['fun'] + chi2cut: values.append(theta[0] * theta[1]) # A = mu * mu_B values = np.array(values) return np.array([np.min(values), np.max(values)]) else: raise Exception(f'ABCD_err: Unknown method {method}')
[docs] @numba.njit def ABCD_2NLL(B,C,D, mu, mu_B, mu_D, EPS=1e-32): """ ABCD estimators negative log-likelihood function DEFINITION: A = B x C / D Args: B,C,D : Measured event counts mu, mu_B, mu_D : Free parameters of the likelihood function (N.B. here number of measurements == number of free parameters) Model relation: Ntot = mu*mu_B + mu_B + mu*mu_D + mu_D = (A) + (B) + (C) + (D) See e.g. https://twiki.cern.ch/twiki/pub/Main/ABCDMethod/ABCDGuide_draft18Oct18.pdf Returns: -2logL """ N_blindtot = mu_B + mu*mu_D + mu_D if N_blindtot < EPS: N_blindtot = EPS LL = (B + C + D) * np.log(N_blindtot) - N_blindtot \ + B * np.log(mu_B / N_blindtot) \ + C * np.log((mu*mu_D) / N_blindtot) \ + D * np.log(mu_D / N_blindtot) return -2*LL
[docs] def test_abcd(): import pytest EPS = 1.5 # --------------------------- # INPUT DATA B = 100 C = 5 D = 50 # --------------------------- A = ABCD_eq(b=B, c=C, d=D) # Uncertainty estimates alpha = 1 - 0.68 # confidence level print(f'INPUT') print(f' B = {B}, C = {C}, D = {D}') print(f'') print(f'ALGEBRAIC ESTIMATE') print(f' A = B x C / D = {A:0.1f}') print(f'') print(f'CONFIDENCE INTERVAL on A (alpha = {alpha:0.2f})') methods = ['errorprop', 'poisson_prc', 'bootstrap_prc', 'likelihood'] for method in methods: CI = ABCD_err(b=B, c=C, d=D, method=method, alpha=alpha) print(f' {method:15s} CI = [{CI[0]:0.2f}, {CI[1]:0.2f}]') # Test assert 5 == pytest.approx(CI[0], abs=EPS) assert 15 == pytest.approx(CI[1], abs=EPS)