# Common input & data reading routines for the electron ID
#
# m.mieskolainen@imperial.ac.uk, 2024
import copy
import numpy as np
from importlib import import_module
import awkward as ak
from icenet.tools import io
from icenet.tools import aux
from icenet.tools import prints
from icenet.tools import iceroot
# ------------------------------------------
from icenet import print
# ------------------------------------------
# Globals
from configs.brem.mctargets import *
from configs.brem.filter import *
from configs.brem.cuts import *
[docs]
def load_root_file(root_path, ids=None, entry_start=0, entry_stop=None, maxevents=None, args=None):
""" Loads the root files
Args:
root_path: path to root files
Returns:
X: jagged columnar data
Y: class labels
W: event weights
ids: columnar variables string (list)
info: trigger, MC xs, pre-selection acceptance x efficiency information (dict)
"""
inputvars = import_module("configs." + args["rootname"] + "." + args["inputvars"])
if type(root_path) is list:
root_path = root_path[0] # Remove [] list
# ----------
param = {
"tree": "ntuplizer/tree",
"entry_start": entry_start,
"entry_stop": entry_stop,
"maxevents": maxevents,
"args": args,
"load_ids": inputvars.LOAD_VARS
}
INFO = {}
X = {}
Y = {}
W = {}
# ----------
for key in args["input"].keys(): # input from yamlgen generated yml
class_id = int(key.split("_")[1])
proc = args["input"][key]
X[key], Y[key], W[key], ids, INFO[key] = iceroot.read_multiple(
class_id=class_id,
process_func=process_root,
processes=proc,
root_path=root_path,
param=param,
num_cpus=args['num_cpus'])
# ----------
X = ak.concatenate(X.values(), axis=0)
Y = ak.concatenate(Y.values(), axis=0)
W = ak.concatenate(W.values(), axis=0)
rand = np.random.permutation(len(X)) # Randomize order, crucial!
X = X[rand]
Y = Y[rand]
W = W[rand]
# ----------
print(f'Event counts per class')
unique, counts = np.unique(Y, return_counts=True)
print(np.asarray((unique, counts)).T)
return {'X':X, 'Y':Y, 'W':W, 'ids':ids, 'info': INFO}
[docs]
def process_root(X, args, ids=None, isMC=None, return_mask=False, class_id=None, **kwargs):
"""
Apply selections
"""
FILTERFUNC = globals()[args['filterfunc']]
CUTFUNC = globals()[args['cutfunc']]
stats = {'filterfunc': None, 'cutfunc': None}
# @@ Filtering done here @@
fmask = FILTERFUNC(X=X, isMC=isMC, class_id=class_id, xcorr_flow=args['xcorr_flow'])
stats['filterfunc'] = {'before': len(X), 'after': sum(fmask)}
print(f'isMC = {isMC} | <filterfunc> before: {len(X)}, after: {sum(fmask)} events ({sum(fmask)/(len(X)+1E-12):0.6f})', 'green')
prints.printbar()
X_new = X[fmask]
# @@ Observable cut selections done here @@
cmask = CUTFUNC(X=X_new, xcorr_flow=args['xcorr_flow'])
stats['cutfunc'] = {'before': len(X_new), 'after': sum(cmask)}
print(f"isMC = {isMC} | <cutfunc> before: {len(X_new)}, after: {sum(cmask)} events ({sum(cmask)/(len(X_new)+1E-12):0.6f}) \n", 'green')
prints.printbar()
io.showmem()
X_final = X_new[cmask]
if return_mask == False:
return X_final, ids, stats
else:
fmask_np = fmask.to_numpy()
fmask_np[fmask_np] = cmask # cmask is evaluated for which fmask == True
return fmask_np
[docs]
def splitfactor(x, y, w, ids, args):
"""
Transform data into different datatypes.
Args:
data: jagged arrays
args: arguments dictionary
Returns:
dictionary with different data representations
"""
print(f'Transform data into different datatypes', 'green')
# ----------
# Init
inputvars = import_module("configs." + args["rootname"] + "." + args["inputvars"])
data = io.IceXYW(x=x, y=y, w=w, ids=ids)
if data.y is not None:
data.y = ak.to_numpy(data.y).astype(np.float32)
if data.w is not None:
data.w = ak.to_numpy(data.w).astype(np.float32)
# ----------
# Pick active variables out
scalar_vars = aux.process_regexp_ids(
all_ids=aux.unroll_ak_fields(x=x, order='first'),
ids=eval('inputvars.' + args['inputvar_scalar'])
)
# ----------
# Extract active kinematic variables
data_kin = None
if inputvars.KINEMATIC_VARS is not None:
kinematic_vars = aux.process_regexp_ids(
all_ids=aux.unroll_ak_fields(x=x, order='first'),
ids=inputvars.KINEMATIC_VARS
)
data_kin = copy.deepcopy(data)
data_kin.x = aux.ak2numpy(x=data.x, fields=kinematic_vars)
data_kin.ids = kinematic_vars
# ----------
# Convert awkward array to 2D numpy array
if data.x is not None:
data.x = aux.ak2numpy(x=data.x, fields=scalar_vars, null_value=args['imputation_param']['fill_value'])
data.ids = copy.deepcopy(scalar_vars)
# ----------
# Unnecessary representations
data_MI = None # Mutual information
data_deps = None # DeepSets
data_tensor = None # Tensor
data_graph = None # Graph
# ----------
return {
'data': data,
'data_MI': data_MI,
'data_kin': data_kin,
'data_deps': data_deps,
'data_tensor': data_tensor,
'data_graph': data_graph
}