Source code for xpipeline.utils.utils

import numpy
import itertools
import random

[docs]def choose_background_injection_training(f, injection_type='onsource_injection', randomseed=1986,): """We need to select half of background and half of injection events for training """ # set set for reproducibility numpy.random.seed(randomseed) random.seed(randomseed) try: for idx, node in enumerate(f.list_nodes('/')): if '/' + injection_type == node._v_pathname: break # First we figure out how many waveforms we injected injection_events = [i for i in list(f.list_nodes('/')[idx]._v_children.keys())] waveforms = [waveform for waveform in list(f.get_node('/{0}/{1}'.format(injection_type, injection_events[0]))._v_children.keys())] inj_scales = [inj_scale for inj_scale in list(f.get_node('/{0}/{1}/{2}'.format(injection_type, injection_events[0], waveforms[0]))._v_children.keys())] all_training_injection_events = [] all_validation_injection_events = [] for injection_event in injection_events: injections = numpy.asarray([injection for injection in list(f.get_node('/{0}/{1}/{2}/{3}'.format(injection_type, injection_event, waveforms[0], inj_scales[0]))._v_children.keys())]) number_of_injections = injections.size training_injection_events_idx = numpy.array(random.sample(list(range(number_of_injections)), max(int(0.5*number_of_injections),1))) validation_injection_events_idx = numpy.setxor1d(numpy.indices(numpy.arange(len(injections)).shape), training_injection_events_idx) list_of_training_injection_paths = [['/' + injection_type], [injection_event], waveforms, inj_scales, injections[training_injection_events_idx].tolist()] training_injection_events = list(['/'.join(x) for x in itertools.product(*list_of_training_injection_paths)]) all_training_injection_events.extend(training_injection_events) list_of_validation_injection_paths = [['/' + injection_type], [injection_event], waveforms, inj_scales, injections[validation_injection_events_idx].tolist()] validation_injection_events = list(['/'.join(x) for x in itertools.product(*list_of_validation_injection_paths)]) all_validation_injection_events.extend(validation_injection_events) except: print('This file does not have any injections will return no training or testing injection events') all_training_injection_events = [] all_validation_injection_events = [] try: # We need to select have of background and half of injection events for training background_events = numpy.asarray([group for group in f.walk_groups('/background') if 'internal_slide' in group._v_name]) number_of_background_events = background_events.size # randomly select half of these events for training training_events = numpy.array(random.sample(list(range(number_of_background_events)), int(0.5*number_of_background_events))) # Use the other indices for testing validation_events = numpy.setxor1d(numpy.indices(numpy.arange(number_of_background_events).shape), training_events) # get training event table names training_background_events = background_events[training_events] # get testing validation_background_events = background_events[validation_events] except: print('This file does not have any background maps, will return no training or testing background events') training_background_events = [] validation_background_events = [] return training_background_events, validation_background_events, all_training_injection_events, all_validation_injection_events