Coverage for estats/likelihood.py: 70%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright (C) 2019 ETH Zurich,
2# Institute for Particle Physics and Astrophysics
3# Author: Dominik Zuercher
5import os
6import pandas as pd
7import numpy as np
8import copy
9from ekit import context
10import ekit.paths as paths
11import scipy
12import scipy.special
13from estats import utils
14import pickle
15from scipy.interpolate import LinearNDInterpolator
16from ekit import logger
17from tqdm import tqdm
18# import numba as nb
19LOGGER = logger.init_logger(__name__)
21# optional imports
22try:
23 from sklearn.decomposition import PCA
24 PCA_enable = True
25except ImportError:
26 LOGGER.warning("Did not find sklearn installation. "
27 "Cannot use PCA.")
28 PCA_enable = False
29try:
30 from sklearn.gaussian_process import GaussianProcessRegressor
31 import sklearn.gaussian_process.kernels as kernels
32 GPR_enable = True
33except ImportError:
34 LOGGER.warning("Did not find sklearn installation. "
35 "Cannot use GPR emulator mode.")
36 GPR_enable = False
37try:
38 import tensorflow as tf
39 LOGGER.debug(f"TensorFlow version: {tf.__version__}")
40 from tensorflow.keras.layers import Dense
41 from tensorflow.keras.models import Sequential
42 from keras.layers.core import Dropout
43 NN_enable = True
44except ImportError:
45 LOGGER.warning("Did not find tensorflow installation. "
46 "Cannot use NN emulator mode.")
47 NN_enable = False
48try:
49 from pypolychord.priors import UniformPrior
50 from pypolychord.priors import GaussianPrior
51 poly_enable = False
52except ImportError:
53 LOGGER.warning(
54 "Did not find polychord installation. "
55 "Cannot evaluate prior in polychord mode "
56 "without polychord being installed")
57 poly_enable = False
60class likelihood:
62 """
63 Class meant to perform parameter inference based on predictions
64 of the data-vectors and covariance matrices at different parameter
65 configurations.
67 The main functionality is to calculate the negative logarithm of the
68 likelihood at a given parameter configuration given a measurement
69 data-vector.
71 The parameter space is broken down into two parts called parameters and
72 nuisances, that are treated differently.
74 For the parameter part it is assumed that the space is sampled densly with
75 simulations and an emulator is built from the simulations.
77 For the nuisances it is assumed that only delta simulations at
78 the fiducial parameter configuration are available where only this one
79 parameters is varied. In this case polynomial scaling
80 relations are fitted for each bin of the data vector that are used
81 to describe the influence of the parameter when predicting the statistic.
82 This implicitly assumes that these parameters are independent from all
83 other parameters.
85 The data vectors can also be compressed using PCA or MOPED compression.
87 The most important functionalities are:
89 - readin_interpolation_data:
91 Loads data used for interpolation. The data is expected to be in a
92 format as used by the estats.summary module.
94 - convert_to_PCA_space:
96 Builds PCA compression and converts all data vectors to PCA space.
97 All output will be in PCA space afterwards.
99 - convert_to_MOPED_space:
101 Builds MOPED compression and converts all data vectors to MOPED space.
102 Requires emulator to be built before.
103 All output will be in MOPED space afterwards.
105 - build_emulator:
107 Builds the emulator for the parameter space used to interpolate
108 the expected data-vectors between different
109 parameter configurations. There are three different choices
110 for the type of interpolator used at the moment:
111 linear: Uses N-Dimensional linear interpolator
112 GPR: Gaussian Process Regressor
113 NN: Neural network
115 - build_scalings:
117 Builds the polynomial scaling realtions for the nuisance parameters
118 individually for each
119 data bin. A polynomial function is fitted for each bin and each
120 nuisance parameter.
122 - get_neg_loglikelihood:
124 Returns negative logarithmic likelihood given a measurement data-vector
125 at the location in parameter space indicated.
127 The accepted keywords are:
129 - statistic:
131 default: Peaks
133 choices: name of one of the statistic plugins
135 Decides which statistic plugin to use. In the likelihood module only
136 the filter function is used from the plugin.
138 - parameters:
140 default: [Om, s8]
142 choices: list of strings
144 The names of the parameters to consider
146 - parameter_fiducials:
148 default: [0.276, 0.811]
150 choices: list of floats
152 The default values of the parameters.
153 Used to decide on the fiducial covariance matrix if no interpolation
154 of the covariance matrix is used.
156 - nuisances:
158 default: [IA, m, z]
160 choices: list of strings
162 The names of the nuisance parameters to consider
164 - nuisance_fiducials:
166 default: [0.0, 0.0, 0.0]
168 choices: list of floats
170 The default values of the nuisance parameters.
171 Used to decide on the fiducial covariance matrix if no interpolation
172 of the covariance matrix is used.
174 - n_tomo_bins:
176 default: 3
178 choices: integer
180 The number of tomographic bins considered. Only needed if the special
181 emulator is used or a statistic with the name Cross in it.
183 - cross_ordering:
185 default: []
187 choices: a list of labels
189 Indicates the order of the tomographic bin labels that is assumed in
190 the filter function.
192 The labels could be bin1xbin2 for example, and the corresponding
193 cross_ordering could be [1x1, 1x2, 2x2, 2x3, 3x3].
195 - multi_bin:
197 default: [False, True, True]
199 choices: A boolean list
201 Indicates if a nuisance parameter in nuisances is a global parameter
202 (False) or a corresponding parameter should be introduced for each
203 tomographic bin (True).
204 """
206 def __init__(self, context_in={}, verbosity=3, **kwargs):
207 """
208 Initializes likelihhod instance
210 :param context_in: Context instance
211 :param verbosity: Verbosity level (0-4)
212 """
214 logger.set_logger_level(LOGGER, verbosity)
216 LOGGER.debug("Initializing likelihood object")
218 # setup context
219 types = ['int', 'list', 'list', 'list',
220 'list', 'list', 'str', 'list', 'list']
222 defaults = [3,
223 ['Omega0', 'Sigma8'], [0.276, 0.811],
224 ['IA', 'm', 'z'], [0.0, 0.0, 0.0],
225 [['flat', 0.07, 0.55], ['flat', 0.47, 1.2],
226 ['flat', -5.5, 5.5],
227 ['flat', -0.1, 0.1], ['flat', -0.1, 0.1]],
228 'Peaks', [], [False, True, True]]
230 allowed = ['n_tomo_bins', 'parameters',
231 'parameter_fiducials', 'nuisances', 'nuisance_fiducials',
232 'prior_configuration', 'statistic',
233 'cross_ordering', 'multi_bin']
235 allowed, types, defaults = utils._get_plugin_contexts(
236 allowed, types, defaults)
237 self.ctx = context.setup_context(
238 {**context_in, **kwargs}, allowed, types, defaults)
240 # GET OBJECTS
241 def get_meta(self, params=None):
242 """
243 Access the meta data table
245 :param: params: Dictionary or list of paramters for filtering.
246 :return: (Filtered) meta data
247 """
249 if params is None:
250 meta = self.META[:]
251 else:
252 dict = self._convert_dict_list(params, 'dict')
253 idx = self._get_loc(dict)
254 meta = self.META[idx]
255 if self.input_normalizer is not None:
256 LOGGER.debug("Applying normalization")
257 meta = self.input_normalizer(meta)
258 return meta
260 def get_means(self, params=None):
261 """
262 Access the mean data vectors stored.
264 :param: params: Dictionary or list of paramters for filtering.
265 :return: Original means
266 """
267 if params is None:
268 means = self.MEANS
269 else:
270 dict_ = self._convert_dict_list(params, 'dict')
272 dict = {}
273 for d in dict_.items():
274 if (d[0] in self.ctx['parameters']) \
275 | (d[0] in self.ctx['nuisances']):
276 dict[d[0]] = d[1]
277 idx = self._get_loc(dict)
278 means = self.MEANS[idx, :]
279 means = self.transform_datavectors(means)
280 return means
282 def get_interp_means(self, params):
283 """
284 Predict interpolated datavector at a parameter configuration.
286 :param: params: Dictionary or list of paramters for filtering.
287 :return: Interpolated data vector
288 """
289 list = self._convert_dict_list(params, 'list')
290 mean = self._get_interp_data_vector(list)
291 return mean
293 # METHODS
294 def transform_datavectors(self, vecs):
295 """
296 Applies all the available modifications to raw data vectors,
297 meaning normalization, filtering, PCA and MOPED compression
298 in this exact order.
300 :param vecs: Raw data vectors that should be tranformed.
301 :return : The transformed data vectors
302 """
304 vecs = np.atleast_2d(vecs)
305 vecs_out = np.zeros(
306 (vecs.shape[0], self._transform_datavector(vecs[0]).shape[1]))
307 for ii, vec in enumerate(vecs):
308 vecs_out[ii] = self._transform_datavector(vec)
309 return vecs_out
311 def clear_redundant_data(self):
312 """
313 Clear out large arrays that are no longer needed after emulator and
314 interpolator are built.
315 """
316 self.MEANS = []
317 self.META = []
318 self.ERRORS = []
319 self.FIDUCIAL_DATA = []
320 LOGGER.debug("Deleted MEANS, META, ERRORS and FIDUCIAL_DATA")
322 def convert_to_PCA_space(self, PCA_cache_path=None, inc_var=99.999,
323 truncate=0, build_precision_matrix=False,
324 check_inversion=True, neglect_correlation=False,
325 partial_PCA=False):
326 """
327 Builds PCA compression matrix.
328 All results are returned in MOPED space afterwards.
330 :param PCA_cache_path: Alternatively provide PCA basis directly via
331 pickle file. If path does not exist will cache
332 built basis to there.
333 :param inc_var: Amount of variance included in the PCA in percent.
334 :param truncate: Maximum number of PCA components to keep.
335 Overrides inc_var.
336 :param build_precision_matrix: To build precision matrix from
337 fdicuial_data_path realisations.
338 :param check_inversion: If True checks numerical stability of inversion
339 of the fiducial covariance matrix
340 :param neglect_correlation: Reduces the covariance matrix to its
341 diagonal
342 :param partial_PCA: If True builds a PCA compression for each
343 statistic otherwise builds a single one for
344 the whole statistic (only used in case of a
345 combined statistic)
346 """
348 if not PCA_enable:
349 raise Exception(
350 "Cannot use PCA compression if sklearn not installed.")
352 if hasattr(self, 'MOPED_matrix'):
353 raise Exception(
354 "Cannot build PCA on top of MOPED compression!.")
356 if PCA_cache_path is None:
357 self._build_PCA_basis(
358 truncate=truncate, inc_var=inc_var, partial_PCA=partial_PCA)
359 else:
360 if os.path.exists(PCA_cache_path):
361 LOGGER.info(f"Loading PCA from cache {PCA_cache_path}")
362 with open(PCA_cache_path, 'rb') as f:
363 self.pca = pickle.load(f)
364 else:
365 self._build_PCA_basis(
366 truncate=truncate, inc_var=inc_var,
367 partial_PCA=partial_PCA)
368 LOGGER.info(f"Saving PCA to cache {PCA_cache_path}")
369 with open(PCA_cache_path, 'wb+') as f:
370 pickle.dump(self.pca, f)
372 LOGGER.info("Successfully built PCA")
374 # build precision matrix
375 if build_precision_matrix:
376 self.build_precision_matrix(
377 check_inversion=check_inversion,
378 neglect_correlation=neglect_correlation)
380 def convert_to_MOPED_space(self, check_inversion=True,
381 neglect_correlation=False,
382 build_precision_matrix=True,
383 MOPED_cache_dir='',
384 incremental_values={
385 'Om': 1e-4, 's8': 1e-4,
386 'IA': 1e-1, 'eta': 1e-1,
387 'm': 1e-4, 'z': 1e-4, 'Ob': 1e-4,
388 'ns': 1e-4, 'H0': 1e-1, 'w0': 1e-4}):
389 """
390 Builds MOPED compression matrix.
391 All results are returned in MOPED space afterwards.
393 :param check_inversion: If True checks numerical stability of inversion
394 of the fiducial covariance matrix
395 :param neglect_correlation: Reduces the covariance matrix to its
396 diagonal before inverting.
397 :param build_precision_matrix: To build precision matrix from
398 fdicuial_data_path realisations.
399 :param MOPED_matrix: Can pass MOPED compression matrix directly
400 instead of building it.
401 :param incremental_values: Need to calculate numerical derivatives.
402 This dictionary defines the incremental
403 values used to calculate the derivatives
404 in each parameter direction.
405 """
407 if os.path.exists(f'{MOPED_cache_dir}/MOPED_matrix.npy'):
408 LOGGER.info(
409 f"Loading MOPED compression matrix from "
410 f"cache {MOPED_cache_dir}/MOPED_matrix.npy.")
411 self.MOPED_matrix = np.load(f"{MOPED_cache_dir}/MOPED_matrix.npy")
412 else:
413 parameters = copy.copy(self.ctx['parameters'])
415 # set fiducial and incremental values to build compression matrix
416 p0_dict = {}
417 d_p0_dict = {}
419 for ii, par in enumerate(self.ctx['parameters']):
420 if par not in incremental_values:
421 raise Exception(
422 f"No incremental value passed for parameter {par}")
423 p0_dict[par] = self.ctx['parameter_fiducials'][ii]
424 d_p0_dict[par] = incremental_values[par]
426 if len(self.ctx['nuisances']) > 0:
427 for ii, par in enumerate(self.ctx['nuisances']):
428 if par not in incremental_values:
429 raise Exception(
430 f"No incremental value passed for parameter {par}")
431 if self.ctx['multi_bin'][ii]:
432 for j in range(len(self.unique_bins)):
433 p = '{}_{}'.format(par, j + 1)
434 parameters += [p]
435 p0_dict[p] = self.ctx['nuisance_fiducials'][ii]
436 d_p0_dict[p] = incremental_values[par]
437 else:
438 parameters += [par]
439 p0_dict[par] = self.ctx['nuisance_fiducials'][ii]
440 d_p0_dict[par] = incremental_values[par]
442 # reset fiducial to IA=0.1 to allow for proper eta scaling
443 if 'IA' in p0_dict.keys():
444 p0_dict['IA'] = 0.1
446 # build MOPED compression matrix
447 self.MOPED_matrix = self._MOPED_compression(
448 parameters, p0_dict, d_p0_dict)
450 # caching
451 if len(MOPED_cache_dir) > 0:
452 paths.mkdir_on_demand(MOPED_cache_dir)
453 np.save(
454 f"{MOPED_cache_dir}/MOPED_matrix.npy", self.MOPED_matrix)
455 LOGGER.info(
456 f"Cached MOPED compression matrix to "
457 f"{MOPED_cache_dir}/MOPED_matrix.npy.")
459 LOGGER.info("Successfully converted to MOPED space")
461 # build precision matrix
462 if build_precision_matrix:
463 self.build_precision_matrix(
464 check_inversion=check_inversion,
465 neglect_correlation=neglect_correlation)
467 def readin_interpolation_data(self, means_path, meta_path='',
468 error_path='',
469 check_inversion=True,
470 fiducial_data_path='', chosen_bins=None,
471 neglect_correlation=False, filter=None,
472 build_precision_matrix=True,
473 reduce_to_selected_scales=True,
474 parameters=None, nuisances=None,
475 normalizer_cache=None,
476 normalize_input=False,
477 normalize_output=False):
478 """
479 Loads data used for interpolation. The data is expected to be in a
480 format as used by the estats.summary module
482 :param means_path: Path to file holding mean datavectors for
483 each cosmology or the data array directly.
484 Shape: (realisations, length of data)
485 :param meta_path: Path to file holding meta data table or the data
486 array directly. Shape: (realisations, number of meta
487 data entries)
488 :param error_path: Path to file holding error vectors or the data
489 array directly. Shape: (realisations, length of
490 data)
491 :param check_inversion: If True checks numerical stability of inversion
492 of the fiducial covariance matrix
493 :param fiducial_data_path: Can readin a file containing realisations
494 at the fiducial parameter setting to build
495 covariance matrix from there.
496 :param chosen_bins: If a list of strings is passed, they indicate the
497 bins that are considered. For example:
498 [1x1, 1x2, 3x4]
499 :param neglect_correlation: Reduces the covariance matrix to its
500 diagonal before inverting.
501 :param filter: Can provide a custom filter function instead of using
502 the one built using the plugins.
503 :param build_precision_matrix: To build precision matrix from
504 fdicuial_data_path realisations.
505 :param reduce_to_selected_scales: If True reduces the datavetor by
506 applying the filter function in the
507 stats plugin.
508 :param parameters: Reduce parameters to passed set of parameters.
509 :param nuisances: Reduce nuisances to passed set of nuisances.
510 :param normalizer_cache: Path to pickle cache file. Normalizers are
511 loaded from there and saved to there.
512 :param normalize_input: If True input features are normalized.
513 :param normalize_output: If True output features are normalized.
514 """
516 if chosen_bins is not None:
517 bins = chosen_bins.split(',')
518 unique_bins = np.zeros(0)
519 for bin in bins:
520 unique_bins = np.append(unique_bins, int(bin[:2]))
521 unique_bins = np.append(unique_bins, int(bin[-2:]))
522 self.unique_bins = np.unique(unique_bins)
523 LOGGER.debug(
524 f"Got custom bin combination setting. "
525 f"Using only bin combinations {bins}")
526 else:
527 if self.ctx['n_tomo_bins'] == 1:
528 self.unique_bins = [0]
529 else:
530 self.unique_bins = np.arange(1, self.ctx['n_tomo_bins'] + 1)
532 select = None
533 idx_norm = None
535 # load normalizers
536 if normalize_input | normalize_output:
537 if (normalizer_cache is not None):
538 if os.path.exists(normalizer_cache):
539 file = open(normalizer_cache, 'rb')
540 normalizer = pickle.load(file)
541 file.close()
542 if normalize_input:
543 self.input_normalizer = normalizer['input_normalizer']
544 orig_input_normalizer = None
545 else:
546 orig_input_normalizer = normalizer['input_normalizer']
547 self.input_normalizer = None
548 if normalize_output:
549 self.output_normalizer = \
550 normalizer['output_normalizer']
551 orig_output_normalizer = None
552 else:
553 orig_output_normalizer = \
554 normalizer['output_normalizer']
555 self.output_normalizer = None
556 LOGGER.info(
557 f"Using normalizer from cache: {normalizer_cache}")
558 else:
559 self.input_normalizer = None
560 self.output_normalizer = None
561 orig_input_normalizer = None
562 orig_output_normalizer = None
563 else:
564 self.input_normalizer = None
565 self.output_normalizer = None
566 orig_input_normalizer = None
567 orig_output_normalizer = None
568 else:
569 self.input_normalizer = None
570 self.output_normalizer = None
572 if len(meta_path) > 0:
573 if isinstance(meta_path, str):
574 LOGGER.info("Loading meta data from: {}".format(meta_path))
575 META = np.load(meta_path)
576 else:
577 META = meta_path[:]
578 select, idx_norm = self._set_meta(
579 META, parameters, nuisances,
580 normalize_input, normalize_output)
581 else:
582 raise Exception(
583 "Cannot initialize without meta data. Pass meta_path.")
585 # set means
586 if len(means_path) > 0:
587 if isinstance(means_path, str):
588 LOGGER.info(
589 "Loading the mean data vectors from {}".format(means_path))
590 MEANS = np.load(means_path)
591 means_is_path = True
592 else:
593 MEANS = means_path[:]
594 means_is_path = False
595 else:
596 raise Exception(
597 "Cannot initialize without mean datavectors. Pass means_path.")
598 self._set_means(MEANS, select, means_is_path)
600 # set custom filter
601 if filter is not None:
602 self.filter = filter
603 else:
604 self.filter = self._create_filter(
605 chosen_bins=chosen_bins,
606 reduce_to_selected_scales=reduce_to_selected_scales)
607 LOGGER.info(
608 f"Filter set. Keeping {np.sum(self.filter)} "
609 f"out of {len(self.filter)} data vector elements")
611 # readin fiducial data
612 if len(fiducial_data_path) > 0:
613 if isinstance(fiducial_data_path, str):
614 LOGGER.info(
615 "Loading data vector realisations "
616 f"from {fiducial_data_path}")
617 self.FIDUCIAL_DATA = np.load(fiducial_data_path)
618 else:
619 self.FIDUCIAL_DATA = fiducial_data_path[:]
620 else:
621 LOGGER.warning(
622 "No fiducial data vectors passed. Will not be able "
623 "to build covariance matrix or calculate likelihood")
625 if normalize_output:
626 if self.output_normalizer is None:
627 if means_is_path:
628 MEANS_norm = MEANS[idx_norm, :]
629 else:
630 MEANS_norm = MEANS[:, :]
631 self.output_normalizer = Normalizer()
632 self.output_normalizer.adapt(np.array(MEANS_norm))
633 LOGGER.debug("Fitted output normalizer")
635 # cache normalizers
636 if normalize_input | normalize_output:
637 if normalizer_cache is not None:
638 file = open(normalizer_cache, 'wb+')
639 if orig_input_normalizer is not None:
640 input_store = orig_input_normalizer
641 else:
642 input_store = self.input_normalizer
643 if orig_output_normalizer is not None:
644 output_store = orig_output_normalizer
645 else:
646 output_store = self.output_normalizer
648 pickle.dump({'input_normalizer': input_store,
649 'output_normalizer': output_store}, file)
650 file.close()
651 LOGGER.info(f"Cached normalizers to {file}")
653 # build precision matrix
654 if build_precision_matrix:
655 self.build_precision_matrix(
656 check_inversion=check_inversion,
657 neglect_correlation=neglect_correlation)
659 # set errors
660 # (not really needed for likelihood but can be convenient for plotting)
661 if len(error_path) > 0:
662 if isinstance(error_path, str):
663 LOGGER.info(
664 "Loading the error data "
665 "vectors from {}".format(error_path))
666 ERRORS = np.load(error_path)
667 else:
668 ERRORS = error_path
669 self.ERRORS = ERRORS[select, :]
670 LOGGER.debug("Set ERRORS")
672 def build_emulator(self, interpolation_mode='linear', selector=None,
673 GPR_kernel=None, GP_cache_file='', scales=6.1,
674 NN_cache_file='', pos_drop=-1, n_epochs=100,
675 batchsize=32, learning_rate=0.001,
676 droprate=0.2, n_neurons=[32, 64, 128, 256],
677 load_weights=False, activation='gelu', l2reg=-0.01):
678 """
679 Build the interpolator for the parameters used to interpolate
680 between different parameter setups. The interpolator is buit for all
681 parameters and nuisances with nuisance_mode set to full.
682 The different interpolation methods are multi and
683 GP (Gaussian process).
685 :param interpolation_mode: Either linear (N-Dim linear interpolator),
686 GPR (Gaussian process regressor)
687 or NN (neural network)
688 :param selector: Can provide a boolean array to preselect the mean
689 datavectors to be considered for the construction of
690 the interpolator
691 :param GPR_kernel: Can provide a scikit kernel that is used to
692 build the GPR. By default a RBF
693 kernel is used.
694 :param GP_cache_file: Optional path to a pickle file loading premade
695 GPR interpolators. If a path is provided that
696 does not exist the built interpolator will be
697 cached there.
698 :param scales: Can pass a list of scales for the RBF kernel for the
699 GPR. Default is none and the scales are optimized using
700 cross validation. If scales is passed the
701 RBF(scales, 'fixed') kernel is used.
702 :param NN_cache_file: Can provide path to a cache file holding
703 pretrained network weights
704 :param pos_drop: Position of dropout layer in the NN.
705 If negative no dropout is used.
706 :param n_epochs: Number of epochs for NN training.
707 :param batchsize: Batchsize used in NN training.
708 :param learning_rate: Adam learning rate used in NN training.
709 :param droprate: Dropout rate used by the dropout layer.
710 :param n_neurons: Number of neurons for each layer.
711 :param load_weights: If True attempts to load pretrained NN weights
712 from the cache file.
713 :param activation: Activation funtion for the hidden layers.
714 :param l2reg: If positive is interpreted as the fractional contribution
715 of L2 regularizer or the hidden layers to the loss
716 function.
717 """
719 if (interpolation_mode == 'GPR') & (not GPR_enable):
720 raise Exception(
721 "Cannot use GPR emulator without scikit-learn installation!")
722 if (interpolation_mode == 'NN') & (not NN_enable):
723 raise Exception(
724 "Cannot use NN emulator without tensorflow installation!")
725 LOGGER.info("Building emulator")
727 ########################
728 # Generate training data
729 ########################
731 # use only data realisations where nuisances are set to default
732 params = {}
733 for ii, param in enumerate(self.ctx['nuisances']):
734 params[param] = self.ctx['nuisance_fiducials'][ii]
735 chosen = self._get_loc(params, preselection=selector)
736 LOGGER.debug(f"Using {np.sum(chosen)} inputs to train the emulator")
738 # create the meta data
739 meta = np.zeros((np.sum(chosen), 0))
740 for param in self.ctx['parameters']:
741 meta = np.hstack((meta, self.META[param][chosen].reshape(-1, 1)))
742 meta_train = np.repeat(
743 self.original_parameter_fiducials.reshape(1, -1),
744 meta.shape[0], axis=0)
745 meta_train[:, self.valid_parameter_positions] = meta
747 # normalize meta
748 if self.input_normalizer is not None:
749 meta_train = self.input_normalizer(meta_train)
750 LOGGER.info("Applied input normalization")
752 # reduce to required number of parameters
753 if not (load_weights & (interpolation_mode == 'NN')):
754 meta_train = meta_train[:, self.valid_parameter_positions]
755 if len(meta_train.shape) == 1:
756 meta_train = meta_train.reshape(-1, 1)
758 # create interpolation data
759 data_train = self.transform_datavectors(self.MEANS[chosen, :])
761 if interpolation_mode == 'linear':
762 LOGGER.info("Building Linear ND interpolator")
763 self.interpolator = LinearNDInterpolator(meta_train, data_train)
765 elif interpolation_mode == 'GPR':
766 LOGGER.info("Building GPR emulator")
767 # check if cached GPR is available
768 if (len(GP_cache_file) > 0) & (os.path.exists(GP_cache_file)):
769 LOGGER.info(
770 f"Using cached GPR emulator from file {GP_cache_file}")
771 # load cache
772 with open(GP_cache_file, 'rb') as f:
773 self.interpolator = pickle.load(f)
774 else:
775 # build GPR
776 if GPR_kernel is not None:
777 kernel = copy.deepcopy(GPR_kernel)
778 LOGGER.info("Training GPR with custom kernel.")
779 elif scales is not None:
780 kernel = kernels.RBF(scales, 'fixed')
781 LOGGER.info("Training GPR with RBF kernel.")
782 else:
783 kernel = kernels.RBF()
784 LOGGER.info("Training GPR with RBF kernel.")
785 gpc = GaussianProcessRegressor(
786 kernel=kernel, n_restarts_optimizer=10,
787 normalize_y=True, copy_X_train=False)
788 gpc.fit(meta_train, data_train)
789 self.interpolator = gpc
791 # caching
792 if len(GP_cache_file) > 0:
793 if not os.path.exists(GP_cache_file):
794 with open(GP_cache_file, 'wb+') as f:
795 pickle.dump(self.interpolator, f)
796 LOGGER.info(
797 f"Caching GPR emulator to file {GP_cache_file}")
799 elif interpolation_mode == 'NN':
800 LOGGER.info("Building NN emulator")
802 def gen_nn(activation, pos_drop, droprate,
803 n_neurons, n_bins, meta, l2reg_rate):
804 l2reg = tf.keras.regularizers.L2(l2=l2reg_rate)
805 model = Sequential()
806 for jj in range(len(n_neurons)):
807 if jj == pos_drop:
808 model.add(Dropout(droprate))
809 model.add(Dense(n_neurons[jj], activation=activation,
810 kernel_regularizer=l2reg))
811 model.add(Dense(n_bins))
813 model.compile()
814 model(meta, training=False)
815 return model
817 if os.path.exists(NN_cache_file):
818 self.interpolator = []
820 # load all the weights
821 model = gen_nn(
822 activation, pos_drop, droprate, n_neurons,
823 data_train.shape[1],
824 tf.convert_to_tensor(
825 np.zeros((1, meta_train.shape[1]))), l2reg)
826 if load_weights:
827 model.load_weights(NN_cache_file)
828 self.interpolator = model
829 LOGGER.info(f"Loaded pretrained NN from {NN_cache_file}")
830 else:
831 LOGGER.info("Training NN on full dataset.")
832 model = gen_nn(
833 activation, pos_drop, droprate, n_neurons,
834 data_train.shape[1],
835 tf.convert_to_tensor(
836 np.zeros((1, meta_train.shape[1]))), l2reg)
837 custom_loss = tf.keras.losses.MeanSquaredError()
838 optimizer = tf.keras.optimizers.Adam(
839 learning_rate=learning_rate)
840 train_loss = tf.keras.metrics.Mean(name='train_loss')
841 train_ds = tf.data.Dataset.from_tensor_slices(
842 (meta_train, data_train)).shuffle(10000).batch(batchsize)
844 train_losses = []
845 for epoch in tqdm(range(n_epochs)):
846 # Reset the metrics at the start of the next epoch
847 train_loss.reset_states()
849 # Training. loop over batches
850 for features, labels in train_ds:
851 with tf.GradientTape() as tape:
852 predictions = model(features, training=True)
853 loss = custom_loss(labels, predictions)
855 gradients = tape.gradient(loss, model.trainable_variables)
856 # train weight
857 optimizer.apply_gradients(
858 zip(gradients, model.trainable_variables))
860 # calculate training loss without dropout
861 for features, labels in train_ds:
862 predictions = model(features, training=False)
863 loss = custom_loss(labels, predictions)
864 train_loss.update_state(loss)
865 train_losses.append(train_loss.result())
867 self.interpolator = model
868 LOGGER.info(f"Finalized NN. Final training loss "
869 f"{np.sqrt(train_losses[-1])} sigma")
870 if len(NN_cache_file) > 0:
871 # caching
872 model.save_weights(NN_cache_file)
873 else:
874 raise ValueError(
875 f"Invalid value for parameter "
876 f"interpolation_mode: {interpolation_mode}")
877 self.interpolation_mode = interpolation_mode
879 def build_scalings(self, selector=None,
880 fitting='2d-poly'):
881 """
882 Builds the emulators for the nuisance parameters individually for each
883 data bin that have mode fiducial or marginal.
884 Fits a polynomial for each nuisance individually
885 (assuming each nuisance
886 parameter to be independent of the other ones and the parameters).
888 :param selector: Can provide a boolean array to preselect the mean
889 datavectors to be considered for the construction of
890 the emulator.
891 :param fitting: Method used for fitting. Format is nd-poly,
892 where n indicates the degree of the
893 polynomial to fit.
894 """
895 if len(self.ctx['nuisances']) == 0:
896 LOGGER.warning(
897 "Cannot build scalings if no nuisances passed. Skipping...")
898 self.emulators = {}
899 self.emulators['fiducial'] = {}
900 # Build the fiducial emulators
901 for idn, nuis in enumerate(self.ctx['nuisances']):
902 LOGGER.debug(f"Building scaling relation for nuisance {nuis}")
903 self.emulators['fiducial'][nuis] = []
904 # choose reference run at fiducial cosmology
905 chosen = [True] * self.META.shape[0]
906 for idp, param in enumerate(self.ctx['parameters']):
907 idx = np.isclose(
908 self.META[param], self.ctx['parameter_fiducials'][idp])
909 chosen = np.logical_and(chosen, idx)
910 for idp, param in enumerate(self.ctx['nuisances']):
911 idx = np.isclose(
912 self.META[param],
913 self.ctx['nuisance_fiducials'][idp])
914 chosen = np.logical_and(chosen, idx)
915 if np.sum(chosen) > 1:
916 raise Exception(
917 "Found more than 1 fiducial run?!")
918 elif np.sum(chosen) < 1:
919 raise Exception(
920 "Found less than 1 fiducial run?!")
922 reference_run = self.MEANS[chosen, :]
924 # get the simulations for building the emulator
925 chosen = [True] * self.META.shape[0]
926 for idp, param in enumerate(self.ctx['parameters']):
927 idx = np.isclose(
928 self.META[param], self.ctx['parameter_fiducials'][idp])
929 chosen = np.logical_and(chosen, idx)
930 for idp, param in enumerate(self.ctx['nuisances']):
931 if param == nuis:
932 continue
933 idx = np.isclose(
934 self.META[param],
935 self.ctx['nuisance_fiducials'][idp])
936 chosen = np.logical_and(chosen, idx)
938 if selector is not None:
939 chosen = np.logical_and(chosen, selector)
940 LOGGER.debug(
941 f"Using {np.sum(chosen)} inputs to build scaling relation")
942 runs_ = self.MEANS[chosen, :]
944 # transform data vectors (only apply filter and normalization)
945 runs = np.zeros((runs_.shape[0], np.sum(self.filter)))
946 for irun, vec in enumerate(runs_):
947 vec = vec.reshape(1, -1)
948 if self.output_normalizer is not None:
949 vec = self.output_normalizer(vec)
950 vec = vec[:, self.filter]
951 runs[irun] = vec
953 if self.output_normalizer is not None:
954 reference_run = self.output_normalizer(reference_run)
955 reference_run = reference_run[:, self.filter]
957 meta = self.META[nuis][chosen]
958 ratios = (runs - reference_run) / reference_run
960 if fitting[1:] != 'd-poly':
961 raise ValueError(f"Variable fitting cannot be {fitting}")
963 # fit polynomial for each data bin using WLS
964 for bin in range(ratios.shape[1]):
965 degree = int(fitting[0])
967 meta = meta.flatten()
968 # no constant term -> force polynom to go through (0,0)
969 terms = [meta**ii for ii in range(1, degree + 1)]
970 A = np.array(terms).T
972 B = ratios[:, bin].flatten()
973 W = np.ones_like(B)
975 # WLS
976 Xw = A * np.sqrt(W)[:, None]
977 yw = B * np.sqrt(W)
978 yw[np.isnan(B)] = 0.0
979 yw[np.isinf(B)] = 0.0
980 coeff, r, rank, s = scipy.linalg.lstsq(Xw, yw)
981 self.emulators['fiducial'][nuis].append(coeff)
982 self.emulators['fiducial'][nuis] = np.array(
983 self.emulators['fiducial'][nuis])
985 self.fitting = fitting
986 LOGGER.info("Built scaling relations")
988 def get_neg_loglikelihood(
989 self, params, measurement, mean=None, debias=False,
990 evaluate_prior=True, prior_mode='emcee'):
991 """
992 Returns negative loglikelihood given a measurement data vector at the
993 location in parameter space indicated by params.
994 Can also pass a mean data vector directly to test.
995 :param params: List/Dictionary of the parameters at which to calculate
996 the neg loglikelihood
997 :param measurement: The measurement data vector to compare to
998 :param mean: Test data vector instead of calculating the expected
999 datavector from the emulator.
1000 :param debias: If True use likelihood considering estimated precision
1001 matrix and data vectors
1002 :param evaluate_prior: If True adds the prior to the likelihood
1003 :param prior_mode: Either emcee or polychord
1004 (polychord requires a different kind of prior)
1005 :return: Negative loglikelihood
1006 """
1008 params = self._convert_dict_list(params, t='list')
1010 if len(params) > len(self.ctx['prior_configuration']):
1011 raise ValueError(
1012 "You passed more parameters than there are "
1013 "entries in your prior!")
1015 if evaluate_prior:
1016 # Get prior
1017 prior = self._get_prior(params, prior_mode)
1018 if np.isnan(prior):
1019 LOGGER.debug("Outside of prior range -> Returning -inf")
1020 return -np.inf
1022 # prediction of datavector at the given parameters
1023 if mean is None:
1024 mean = self._get_interp_data_vector(params)
1025 if np.any(np.isnan(mean)):
1026 LOGGER.debug(
1027 "Got nan values in data vector prediction "
1028 "-> Returning -inf")
1029 return -np.inf
1031 if not hasattr(self, 'precision_matrix'):
1032 raise Exception(
1033 "Cannot calculate loglikelihood without precision "
1034 "matrix being built")
1036 # measurement = self._transform_datavector(measurement)
1038 # Calculate the likelihood
1039 Q = self._get_likelihood(
1040 measurement.reshape(1, -1), mean, self.precision_matrix,
1041 debias=debias)
1043 # get neg loglike
1044 log_like = -0.5 * Q
1045 if evaluate_prior:
1046 LOGGER.debug("Adding prior to likelihood")
1047 log_like -= 0.5 * prior
1049 if log_like > 0.0:
1050 return -np.inf
1051 if np.isnan(log_like):
1052 return -np.inf
1053 if not np.isfinite(log_like):
1054 return -np.inf
1056 LOGGER.debug(f"Evaluated neg_loglikelihood to {log_like}")
1057 return log_like
1059 # HELPER FUNCTIONS
1060 def _build_PCA_basis(self, truncate=0, inc_var=99.999, partial_PCA=False):
1061 LOGGER.info("Building PCA basis")
1063 PCA_means = self.transform_datavectors(self.MEANS)
1064 self.pca = {}
1065 if truncate > 0:
1066 if partial_PCA:
1067 stats = self.ctx['statistic'].split('-')
1068 counter = 0
1069 for stat in stats:
1070 pca = PCA(n_components=truncate, whiten=True)
1071 pca.fit(PCA_means[
1072 :, counter:counter + self.datavectorlengths[stat]])
1073 self.pca[stat] = pca
1074 LOGGER.info(
1075 f'Keeping {len(pca.explained_variance_)} PCA '
1076 f'components for statistic {stat}')
1077 counter += self.datavectorlengths[stat]
1078 else:
1079 pca = PCA(n_components=truncate, whiten=True)
1080 pca.fit(PCA_means)
1081 LOGGER.info(
1082 f'Keeping {len(pca.explained_variance_)} PCA components')
1083 self.pca[self.ctx['statistic']] = pca
1084 else:
1085 if partial_PCA:
1086 stats = self.ctx['statistic'].split('-')
1087 counter = 0
1088 for stat in stats:
1089 pca = PCA(n_components=(inc_var / 100.),
1090 svd_solver='full', whiten=True)
1091 pca.fit(PCA_means[
1092 :, counter:counter + self.datavectorlengths[stat]])
1093 self.pca[stat] = pca
1094 LOGGER.info(
1095 f'Keeping {len(pca.explained_variance_)} PCA '
1096 f'components for statistic {stat}')
1097 counter += self.datavectorlengths[stat]
1098 else:
1099 pca = PCA(n_components=(inc_var / 100.),
1100 svd_solver='full', whiten=True)
1101 pca.fit(PCA_means)
1102 LOGGER.info(
1103 f'Keeping {len(pca.explained_variance_)} PCA components')
1104 self.pca[self.ctx['statistic']] = pca
1106 def _scale_peak_func(self, params):
1107 """
1108 Get scale factors from emulator which one needs to multiply to
1109 the datavector
1110 in order to emulate a certain nuisance configuration
1111 """
1113 scale = np.ones(np.sum(self.filter))
1114 fiducial_keys = np.arange(len(self.ctx['nuisances']))
1115 degree = int(self.fitting[0])
1116 scale = _get_scaling(
1117 scale, params,
1118 fiducial_keys_pos=fiducial_keys,
1119 nuisances=np.asarray(self.ctx['nuisances']),
1120 emulators=self.emulators,
1121 degree=degree,
1122 p_index=self.uni_p,
1123 p_inv=self.uni_p_inv)
1124 return scale
1126 def _get_likelihood(self, meas, mean, incov, debias=False):
1127 """
1128 Returns likelihood using either standard Gaussian likelihood or using
1129 debiasing due to estimated cov marix and predicted datavaectors
1130 (debias=True)
1131 """
1132 diff = mean - meas
1133 Q = np.dot(diff, np.dot(incov, diff.reshape(-1, 1)))[0]
1134 if debias:
1135 """
1136 According to:
1137 Jeffrey, Niall, and Filipe B. Abdalla. Parameter inference and
1138 model comparison using theoretical predictions from noisy
1139 simulations. Monthly Notices of the Royal Astronomical
1140 Society 490.4 (2019): 5749-5756.
1141 """
1143 LOGGER.debug("Debiasing likelihood")
1144 Q = 1. + Q * self.N_mean / ((self.N_mean + 1.) * (self.N_cov - 1.))
1145 Q *= self.N_cov
1147 return Q
1149 def _create_filter(self, chosen_bins=None,
1150 reduce_to_selected_scales=False):
1151 """
1152 Creates a filter that cuts out the required data vector elements
1153 and an array indicating the tomographic bins for the emulator
1154 as well as some other arrays used by the interpolator.
1155 """
1157 def get_tomo_bin_num(stat, ctx):
1158 if 'Cross' in stat:
1159 tomo_bins = int(scipy.special.binom(
1160 ctx['n_tomo_bins'] + 2 - 1,
1161 ctx['n_tomo_bins'] - 1))
1162 else:
1163 tomo_bins = self.ctx['n_tomo_bins']
1164 return tomo_bins
1166 filter = np.zeros(0, dtype=bool)
1167 bin_filter = np.zeros(0, dtype=str)
1169 stats = self.ctx['statistic'].split('-')
1170 self.datavectorlengths = {}
1172 for stat in stats:
1173 vec_length = 0
1174 plugin = utils.import_executable(stat, 'filter')
1175 # determine number of different tomographic bins
1176 tomo_bins = get_tomo_bin_num(stat, self.ctx)
1178 # build a filter to select the relevant data bins
1179 for bin in range(tomo_bins):
1180 if 'Cross' in stat:
1181 bin_val = self.ctx['cross_ordering'][bin]
1182 elif self.ctx['n_tomo_bins'] == 1:
1183 bin_val = '00x00'
1184 else:
1185 bin_val = '{}x{}'.format(
1186 str(bin + 1).zfill(2), str(bin + 1).zfill(2))
1187 # add to filter
1188 self.ctx['bin'] = bin
1189 f = plugin(self.ctx)
1190 if not reduce_to_selected_scales:
1191 f = np.ones(len(f), dtype=bool)
1192 # allow to ignore certain bins
1193 if chosen_bins is not None:
1194 if bin_val not in chosen_bins:
1195 f = np.full(len(f), False)
1196 filter = np.append(filter, f)
1197 vec_length += np.sum(f)
1198 self.datavectorlengths[stat] = int(vec_length)
1200 # build an array indicating which data bins belong to which
1201 # tomographic bin
1202 for bin in range(tomo_bins):
1203 if 'Cross' in stat:
1204 bin_val = self.ctx['cross_ordering'][bin]
1205 elif self.ctx['n_tomo_bins'] == 1:
1206 bin_val = '00x00'
1207 else:
1208 bin_val = '{}x{}'.format(
1209 str(bin + 1).zfill(2), str(bin + 1).zfill(2))
1210 if chosen_bins is not None:
1211 if bin_val not in chosen_bins:
1212 if reduce_to_selected_scales:
1213 continue
1215 # add to bin filter
1216 self.ctx['bin'] = bin
1217 f = plugin(self.ctx)
1218 if not reduce_to_selected_scales:
1219 f = np.ones(len(f), dtype=bool)
1221 bin_filter = np.append(
1222 bin_filter, [bin_val] * int(np.sum(f)))
1224 self.bin_array = bin_filter
1226 # get mapping for each data vector bin and nuisance to the correct
1227 # location in nuisance parameters
1228 p_index = np.zeros(
1229 (int(np.sum(filter)), len(self.ctx['nuisances']), 2), dtype=int)
1230 for s in range(int(np.sum(filter))):
1231 bin = self.bin_array[s]
1232 counter = len(self.ctx['parameters'])
1233 for ii, param in enumerate(self.ctx['nuisances']):
1234 if self.ctx['multi_bin'][ii]:
1235 # take multi_bin mode into account
1236 if int(bin[:2]) == 0:
1237 # non-tomographic
1238 idx1 = counter + int(bin[:2])
1239 idx2 = idx1
1240 counter += 1
1241 else:
1242 idx1 = counter + \
1243 np.where(self.unique_bins == int(bin[:2]))[0]
1244 idx2 = counter + \
1245 np.where(self.unique_bins == int(bin[-2:]))[0]
1246 counter += len(self.unique_bins)
1247 p_index[s, ii, 0] = idx1
1248 p_index[s, ii, 1] = idx2
1249 else:
1250 p_index[s, ii, 0] = counter
1251 p_index[s, ii, 1] = counter
1252 counter += 1
1254 # calculate the unique combinations
1255 self.uni_p = []
1256 self.uni_p_inv = []
1257 for ii, param in enumerate(self.ctx['nuisances']):
1258 uni_p_index, p_index_inv = np.unique(p_index[:, ii, :], axis=0,
1259 return_inverse=True)
1260 self.uni_p.append(uni_p_index)
1261 self.uni_p_inv.append(p_index_inv)
1262 self.uni_p = tuple(self.uni_p)
1263 self.uni_p_inv = np.asarray(self.uni_p_inv, dtype=int)
1265 return filter.astype(bool)
1267 def _get_loc(self, params, exclude=False, preselection=None):
1268 """
1269 Get a filter for data realisations where parameters are set to params
1270 """
1271 if preselection is None:
1272 chosen = [True] * self.META.shape[0]
1273 else:
1274 chosen = preselection
1275 for param in params.keys():
1276 idx = np.isclose(self.META[param], params[param])
1277 chosen = np.logical_and(chosen, idx)
1278 if exclude:
1279 chosen = np.logical_not(chosen)
1280 return chosen
1282 def _get_interp_data_vector(self, params):
1283 """
1284 Returns an interpolated data vector at parameters params
1285 """
1286 pars = self.original_parameter_fiducials[:]
1287 pars[self.valid_parameter_positions] = np.asarray(
1288 params)[self.valid_parameter_positions]
1289 # normalize
1290 if self.input_normalizer is not None:
1291 pars = self.input_normalizer(pars.reshape(1, -1))
1292 LOGGER.debug("Applied input normalizer")
1293 pars = np.atleast_2d(pars)
1295 if self.interpolation_mode == "linear":
1296 mean = self.interpolator(pars)
1297 mean = np.asarray(mean).flatten()
1298 elif self.interpolation_mode == "GPR":
1299 # get prediction from GPR interpolator
1300 mean = self.interpolator.predict(pars)
1301 mean = np.asarray(mean).flatten()
1302 elif self.interpolation_mode == "NN":
1303 mean = self.interpolator(
1304 tf.convert_to_tensor(pars), training=False)
1305 mean = np.asarray(mean).flatten()
1307 # scale prediction according to polynomial models
1308 if len(params) > len(pars):
1309 LOGGER.debug("Applying scaling relations")
1310 if hasattr(self, 'pca'):
1311 if len(list(self.pca.keys())) > 1:
1312 # partial pca
1313 stats = self.ctx['statistic'].split('-')
1314 new_vec = np.zeros(0)
1315 counter = 0
1316 for stat in stats:
1317 vec_partial = mean[
1318 counter:counter + self.pca[stat].n_components_]
1319 new_vec = np.append(
1320 new_vec, self.pca[stat].inverse_transform(
1321 vec_partial.reshape(1, -1)).flatten())
1322 counter += self.pca[stat].n_components_
1323 mean = new_vec
1324 else:
1325 mean = self.pca[self.ctx['statistic']].inverse_transform(
1326 mean.reshape(1, -1))
1328 scale_facs = self._scale_peak_func(params).reshape(1, -1)
1329 mean = mean * scale_facs
1331 if hasattr(self, 'pca'):
1332 if len(list(self.pca.keys())) > 1:
1333 # partial pca
1334 stats = self.ctx['statistic'].split('-')
1335 new_vec = np.zeros(0)
1336 counter = 0
1337 for stat in stats:
1338 vec_partial = mean[
1339 :, counter:counter+self.datavectorlengths[stat]]
1340 new_vec = np.append(
1341 new_vec,
1342 self.pca[stat].transform(
1343 vec_partial.reshape(1, -1)).flatten())
1344 counter += self.datavectorlengths[stat]
1345 mean = new_vec
1346 else:
1347 mean = self.pca[self.ctx['statistic']].transform(
1348 mean).reshape(1, -1)
1350 # moped compression
1351 if hasattr(self, 'MOPED_matrix'):
1352 LOGGER.debug("Converting to MOPED space")
1353 mean = np.matmul(
1354 self.MOPED_matrix, mean.reshape(-1, 1)).reshape(1, -1)
1355 return mean
1357 def _invert_cov_matrices(self, check_inversion=True,
1358 neglect_correlation=False):
1359 """
1360 Inverts covariance matrices
1361 :param neglect_correlation: Reduces the covariance matrix to its
1362 diagonal before inverting.
1363 """
1365 temps = []
1366 for ii in range(self.precision_matrix.shape[0]):
1367 # checking stability of inversion
1368 if neglect_correlation:
1369 inverse = np.linalg.solve(
1370 np.diag(
1371 np.diag(self.precision_matrix[ii, :, :])),
1372 np.eye(self.precision_matrix[ii, :, :].shape[0]))
1373 else:
1374 inverse = np.linalg.solve(
1375 self.precision_matrix[ii, :, :],
1376 np.eye(self.precision_matrix[ii, :, :].shape[0]))
1377 if check_inversion:
1378 id_check = np.dot(
1379 self.precision_matrix[ii, :, :], inverse)
1380 id = np.eye(id_check.shape[0])
1381 if not np.allclose(id_check, id):
1382 raise Exception(
1383 "Inversion of Fiducial Covariance matrix "
1384 "did not pass numerical stability test")
1385 else:
1386 LOGGER.debug("Successfully inverted Fiducial "
1387 "Covariance matrix")
1388 temps.append(inverse)
1389 self.precision_matrix = np.asarray(temps)
1390 if self.precision_matrix.shape[0] == 1:
1391 self.precision_matrix = self.precision_matrix[0]
1393 def _convert_dict_list(self, obj, t='list'):
1394 """
1395 Converts dictionary to list or the othwer way round
1396 """
1397 if isinstance(obj, dict):
1398 f = 'dict'
1399 elif (isinstance(obj, list)) | (isinstance(obj, np.ndarray)):
1400 f = 'list'
1401 else:
1402 raise Exception("{} is not a list nor a dictionary".format(obj))
1404 if f == t:
1405 return obj
1406 elif (f == 'dict') & (t == 'list'):
1407 list_ = []
1408 for key in self.ctx['parameters']:
1409 if key in list(obj.keys()):
1410 list_.append(obj[key])
1411 for ii, key in enumerate(self.ctx['nuisances']):
1412 if key in list(obj.keys()):
1413 list_.append(obj[key])
1414 for bin in range(1, 1 + self.ctx['n_tomo_bins']):
1415 if '{}_{}'.format(key, bin) in list(obj.keys()):
1416 list_.append(obj['{}_{}'.format(key, bin)])
1417 return list_
1419 elif (f == 'list') & (t == 'dict'):
1420 if len(list(obj.keys())) == len(self.ctx['parameters']):
1421 LOGGER.warning(
1422 "The length of the oject {} matches only the number of "
1423 "parameters. Assuming that only parameters and no "
1424 "nuisances given.".format(obj))
1425 check = False
1426 elif len(list(obj.keys())) == \
1427 (len(self.ctx['parameters']) + len(self.ctx['nuisances'])):
1428 check = True
1429 else:
1430 raise Exception(
1431 "The list {} does not contain the right number of "
1432 "parameters. Either only parameters or parameters + "
1433 "nuisances".format(obj))
1435 dict_ = {}
1436 for ik, key in enumerate(self.ctx['parameters']):
1437 dict_[key] = obj[ik]
1438 if check:
1439 for key in self.ctx['nuisances']:
1440 if key in list(obj.keys()):
1441 list.append(obj[key])
1442 return dict_
1444 def _MOPED_compression(self, parameters, p0_dict, d_p0_dict):
1445 '''
1446 In order to compress a given data vector,
1447 you need to be able to compute the derivative
1448 of your data vector wrt all the parameters in your analysis
1449 (cosmological + nuisance).
1450 The output will be a matrix of length (N_params,len_uncompressed_DV).
1451 Then you just need to do:
1452 t_matrix # this the compression matrix
1453 compressed_cov =
1454 np.matmul(t_matrix,np.matmul(uncompressed_cov,t_matrix.T))
1455 compressed_DV = np.matmul(t_matrix,uncompressed_DV)
1456 Credits: Marco Gatti
1457 '''
1459 try:
1460 inv_cov = self.precision_matrix
1461 len_uncompressed_DV = inv_cov.shape[0]
1462 except AttributeError:
1463 raise Exception(
1464 "Cannot build MOPED compression because precision matrix "
1465 "was not built -> Set build_precision_matrix=True in "
1466 "readin_interpolation_data")
1467 if not hasattr(self, 'interpolator'):
1468 raise Exception(
1469 "Cannot build MOPED compression without emulator. "
1470 "Run build_emulator() first")
1472 # this initialises the compression matrix.
1473 transf_matrix = np.zeros((len(parameters), len_uncompressed_DV))
1475 for count, parameter in enumerate(parameters):
1476 LOGGER.debug(f"Building compression for parameter {parameter}")
1477 LOGGER.debug(f"Using fiducial value {p0_dict[parameter]} "
1478 f"and increment {d_p0_dict[parameter]}")
1479 p1_dict = copy.copy(p0_dict)
1480 # initialise some stuff.
1481 ddh = np.zeros((len_uncompressed_DV, 4))
1482 der = np.zeros(len_uncompressed_DV)
1483 # now I am doing the 5-stencil derivative wrt to one parameter
1484 # at a time.
1485 # define a new dictionary with only ONE parameter slightly varied.
1486 p1_dict[parameter] = p0_dict[parameter] - 2 * d_p0_dict[parameter]
1487 u = self._compute_theory(p1_dict)
1488 ddh[:, 0] = u
1490 p1_dict[parameter] = p0_dict[parameter] - d_p0_dict[parameter]
1491 u = self._compute_theory(p1_dict)
1492 ddh[:, 1] = u
1493 p1_dict[parameter] = p0_dict[parameter] + d_p0_dict[parameter]
1494 u = self._compute_theory(p1_dict)
1495 ddh[:, 2] = u
1497 p1_dict[parameter] = p0_dict[parameter] + 2 * d_p0_dict[parameter]
1498 u = self._compute_theory(p1_dict)
1499 ddh[:, 3] = u
1501 der = -(- ddh[:, 0] + 8 * ddh[:, 1] - 8 * ddh[:, 2]
1502 + ddh[:, 3]) / (12 * d_p0_dict[parameter])
1503 der = der.reshape(-1, 1)
1504 # the inv_cov is the approximate uncompressed inverse covariance.
1505 bc = np.matmul(inv_cov, der)
1506 norm = np.matmul(der.T, np.matmul(inv_cov, der))
1507 if count > 0:
1508 for q in range(count):
1509 w = np.matmul(
1510 der.T, transf_matrix[q, :].reshape(-1, 1))
1511 w = w[0][0] * transf_matrix[q, :].reshape(-1, 1)
1512 bc = bc - w
1513 norm = norm - np.matmul(der.T, transf_matrix[q, :])**2.
1514 norm = np.abs(norm)
1515 norm = np.sqrt(norm)
1516 transf_matrix[count, :] = bc.T / norm[0][0]
1517 return transf_matrix
1519 def _compute_theory(self, p0_dict):
1520 list = self._convert_dict_list(p0_dict, 'list')
1521 mean = self._get_interp_data_vector(list)
1522 return mean.reshape(-1)
1524 def _transform_datavector(self, vec_in):
1525 """
1526 Applies all the available modifications to a raw data vector,
1527 meaning normalization, filtering, PCA and MOPED compression
1528 in this exact order.
1529 :param vec_in: A raw data vector that should be tranformed.
1530 :return: The transformed data vector
1531 """
1533 vec = vec_in[:].reshape(1, -1)
1534 if self.output_normalizer is not None:
1535 vec = self.output_normalizer(vec)
1536 LOGGER.debug("Applied output normalization")
1537 vec = vec[:, self.filter]
1538 if hasattr(self, 'pca'):
1539 if len(list(self.pca.keys())) > 1:
1540 # partial pca
1541 stats = self.ctx['statistic'].split('-')
1542 new_vec = np.zeros(0)
1543 counter = 0
1544 for stat in stats:
1545 vec_partial = vec[
1546 :, counter:counter+self.datavectorlengths[stat]]
1547 new_vec = np.append(
1548 new_vec,
1549 self.pca[stat].transform(
1550 vec_partial.reshape(1, -1)).flatten())
1551 counter += self.datavectorlengths[stat]
1552 vec = new_vec
1553 else:
1554 vec = self.pca[self.ctx['statistic']].transform(vec)
1555 LOGGER.debug("Applied PCA")
1556 if hasattr(self, 'MOPED_matrix'):
1557 vec = np.matmul(self.MOPED_matrix, vec.reshape(-1, 1))
1558 LOGGER.debug("Applied MOPED compression")
1559 return vec.reshape(1, -1)
1561 def _set_meta(self, meta, parameters=None, nuisances=None,
1562 normalize_input=False, normalize_ouput=False):
1563 """
1564 Set meta data table
1566 :param meta: Meta table to set
1567 """
1569 if parameters is None:
1570 parameters = copy.copy(self.ctx['parameters'])
1571 if nuisances is None:
1572 nuisances = copy.copy(self.ctx['nuisances'])
1574 if (self.input_normalizer is None) \
1575 & (normalize_input | normalize_ouput):
1576 # build meta normalizer using all realisations used for emulator
1578 if normalize_input:
1579 # only use those with fiducial nuisance
1580 idx_norm = np.ones(meta.shape[0], dtype=bool)
1581 for nuis, fid in zip(
1582 self.ctx['nuisances'], self.ctx['nuisance_fiducials']):
1583 idx_norm = np.logical_and(
1584 idx_norm, np.isclose(meta[nuis], fid))
1585 meta_norm = meta[idx_norm]
1586 meta_norm = pd.DataFrame(meta_norm)
1587 meta_norm = meta_norm.drop(columns=['tomo', 'NREALS'])
1588 for nuis in self.ctx['nuisances']:
1589 meta_norm = meta_norm.drop(columns=[nuis])
1590 self.input_normalizer = Normalizer()
1591 self.input_normalizer.adapt(np.array(meta_norm))
1592 LOGGER.debug("Built input normalizer")
1593 else:
1594 idx_norm = None
1596 # allow to only use a subspace of the original parameters/nuisances
1598 # parameters
1599 rejects = {}
1600 positions = []
1601 self.original_parameter_fiducials = np.asarray(
1602 self.ctx['parameter_fiducials'][:]).flatten()
1603 self.original_parameters = np.asarray(
1604 self.ctx['parameters'][:]).flatten()
1605 LOGGER.info(
1606 f"Using parameters {parameters} "
1607 f"out of {self.original_parameters}")
1609 orig = np.asarray(copy.copy(self.ctx['parameters']))
1610 self.ctx['parameters'] = []
1611 self.ctx['parameter_fiducials'] = []
1612 for p in parameters:
1613 if p in orig:
1614 pos = np.where(orig == p)[0][0]
1615 positions.append(pos)
1616 else:
1617 raise ValueError(
1618 f"Requested parameter {p} is not in parameters.")
1619 self.ctx['parameters'].append(p)
1620 self.ctx['parameter_fiducials'].append(
1621 self.original_parameter_fiducials[pos])
1622 reject_params = list(set(orig) - set(self.ctx['parameters']))
1623 for p in reject_params:
1624 rejects[p] = self.original_parameter_fiducials[
1625 np.where(orig == p)[0][0]]
1627 # store all valid parameter positions
1628 self.valid_parameter_positions = copy.deepcopy(
1629 np.asarray(positions, dtype=int).flatten())
1631 # nuisances
1632 self.original_nuisance_fiducials = np.asarray(
1633 self.ctx['nuisance_fiducials'][:]).flatten()
1634 self.original_nuisances = np.asarray(
1635 self.ctx['nuisances'][:]).flatten()
1636 LOGGER.info(
1637 f"Using nuisances {nuisances} "
1638 f"out of {self.original_nuisances}")
1640 orig_nuis = np.asarray(copy.copy(self.ctx['nuisances']))
1641 orig_bin = copy.copy(self.ctx['multi_bin'])
1642 self.ctx['nuisances'] = []
1643 self.ctx['nuisance_fiducials'] = []
1644 self.ctx['multi_bin'] = []
1645 counter = 0
1646 for p in nuisances:
1647 if p in orig_nuis:
1648 pos = np.where(orig_nuis == p)[0][0]
1649 if orig_bin[pos]:
1650 positions += [
1651 pos + len(orig) + x for x in range(
1652 len(self.unique_bins))]
1653 counter += len(self.unique_bins)
1654 else:
1655 positions.append(counter + len(orig))
1656 counter += 1
1657 else:
1658 raise ValueError(
1659 f"Requested nuisance {p} is not in nuisances.")
1660 self.ctx['nuisances'].append(p)
1661 self.ctx['nuisance_fiducials'].append(
1662 self.original_nuisance_fiducials[pos])
1663 self.ctx['multi_bin'].append(orig_bin[pos])
1664 reject_params = list(set(orig_nuis) - set(self.ctx['nuisances']))
1665 for p in reject_params:
1666 rejects[p] = self.original_nuisance_fiducials[
1667 np.where(orig_nuis == p)[0][0]]
1669 # adjust prior
1670 orig_prior = copy.deepcopy(self.ctx['prior_configuration'])
1671 self.ctx['prior_configuration'] = []
1672 for pos in positions:
1673 self.ctx['prior_configuration'].append(orig_prior[pos])
1674 LOGGER.info(
1675 f"Set prior configuration to {self.ctx['prior_configuration']}")
1677 new_dtype = np.dtype({'names': tuple(['tomo', 'NREALS']),
1678 'formats': tuple(['i4', 'i4'])})
1680 # add all parameters and nuisances specified in config
1681 for ii, param in enumerate(self.ctx['parameters']):
1682 new_dtype = np.dtype(new_dtype.descr + [(param, 'f8')])
1683 for ii, nuis in enumerate(self.ctx['nuisances']):
1684 new_dtype = np.dtype(new_dtype.descr + [(nuis, 'f8')])
1686 # create new meta data with only specified parameters
1687 new_meta = np.zeros(meta.shape, dtype=new_dtype)
1688 for par in ['tomo', 'NREALS']:
1689 new_meta[par] = meta[par]
1690 for ii, par in enumerate(self.ctx['parameters']):
1691 try:
1692 new_meta[par] = meta[par]
1693 except ValueError:
1694 new_meta[nuis] = [self.ctx['parameter_fiducials'][ii]] \
1695 * meta.shape[0]
1696 for ii, nuis in enumerate(self.ctx['nuisances']):
1697 try:
1698 new_meta[nuis] = meta[nuis]
1699 except ValueError:
1700 new_meta[nuis] = [self.ctx['nuisance_fiducials'][ii]] \
1701 * meta.shape[0]
1703 # reject simulations where rejected parameters
1704 # are not set to fiducial values
1705 select = np.ones(meta.shape[0], dtype=bool)
1706 for r in rejects.items():
1707 select = np.logical_and(select, np.isclose(meta[r[0]], r[1]))
1709 self.META = new_meta[select]
1710 self.N_mean = np.mean(self.META['NREALS'])
1711 return select, idx_norm
1713 def _set_means(self, MEANS, select, means_is_path):
1714 """
1715 Set mean data vectors
1717 :param MEANS: Means to set
1718 """
1720 if means_is_path:
1721 if select is not None:
1722 MEANS = MEANS[select, :]
1723 else:
1724 MEANS = MEANS[:, :]
1725 else:
1726 MEANS = MEANS[:, :]
1728 self.MEANS = MEANS
1729 if self.MEANS.shape[0] != self.META.shape[0]:
1730 raise Exception(
1731 f"meta data and mean data vectors do not match! "
1732 f"Received {self.META.shape[0]} meta data entries "
1733 f"and {self.MEANS.shape[0]} data vectors")
1735 def _get_prior(self, params, prior_mode='emcee'):
1736 """
1737 Evaluates the prior.
1738 :param params: List of parameters
1739 (order should be same as in parameters and nuisances)
1740 :param prior_mode: Either emcee or polychord
1741 (polychord requires a different kind of prior)
1742 """
1743 if prior_mode == 'polychord':
1744 if not poly_enable:
1745 raise Exception(
1746 "Cannot use prior_mode=polychord "
1747 "without polychord installation!")
1748 prior = []
1749 for ii in range(len(params)):
1750 prior_config = self.ctx['prior_configuration'][ii]
1751 if prior_config[0] == 'flat':
1752 prior.append(
1753 UniformPrior(
1754 prior_config[1],
1755 prior_config[2])(params[ii]))
1756 elif prior_config[0] == 'normal':
1757 prior.append(
1758 GaussianPrior(
1759 prior_config[1],
1760 prior_config[2])(params[ii]))
1761 elif prior_mode == 'emcee':
1762 prior = 0.0
1763 for ii in range(0, len(params)):
1764 prior_config = self.ctx['prior_configuration'][ii]
1765 if prior_config[0] == 'flat':
1766 if params[ii] < prior_config[1]:
1767 prior = np.nan
1768 break
1769 if params[ii] > prior_config[2]:
1770 prior = np.nan
1771 break
1772 elif prior_config[0] == 'normal':
1773 prior += (
1774 (params[ii] - prior_config[1]) / prior_config[2])**2.
1775 else:
1776 raise ValueError(f"Option {prior_mode} for prior mode not valid")
1777 LOGGER.debug(f"Evaluated prior to {prior}")
1778 return prior
1780 def build_precision_matrix(self, check_inversion=True,
1781 neglect_correlation=True):
1782 """
1783 Uses the fiducial data vector realisations to build the precision
1784 matrix.
1785 :param check_inversion: If True performs some stability checks for
1786 the inversion.
1787 :param neglect_correlation: If True all off-diagonal elements in
1788 the cov matrix are set to 0.
1789 """
1791 if not hasattr(self, 'FIDUCIAL_DATA'):
1792 raise Exception(
1793 "Cannot build precision matrix without fiducial data vectors!")
1794 fiducials = self.transform_datavectors(self.FIDUCIAL_DATA)
1796 cov = np.cov(fiducials, rowvar=False)
1798 # only setting a single central covariance matrix
1799 self.precision_matrix = cov.reshape(1, cov.shape[0], cov.shape[1])
1800 self._invert_cov_matrices(check_inversion=check_inversion,
1801 neglect_correlation=neglect_correlation)
1802 self.N_cov = self.FIDUCIAL_DATA.shape[0]
1805# @nb.jit(nopython=True)
1806def _get_param_array(params, p_index, degree, fiducial_keys_pos):
1807 ps = []
1808 for ii in fiducial_keys_pos:
1809 p_ = p_index[ii]
1810 ps_ = []
1811 for s in range(len(p_)):
1812 p = (params[p_[s, 0]]
1813 + params[p_[s, 1]]) / 2.
1814 ps_.append(p)
1815 ps.append(ps_)
1817 # for polynomial precalculate all the terms
1818 p_terms = []
1819 for ii in range(len(fiducial_keys_pos)):
1820 ps_ = ps[ii]
1821 terms_ = []
1822 for s in range(len(ps_)):
1823 p = ps_[s]
1824 terms = np.array(
1825 [p**xx for xx in range(1, degree + 1)])
1826 terms_.append(terms)
1827 p_terms.append(terms_)
1828 return ps, p_terms
1831# @nb.jit(nopython=True)
1832def _get_scale(length, p_terms, emulators, p_inv):
1833 # loop over each data bin
1834 scale = np.zeros(length)
1835 for s in range(length):
1836 terms = p_terms[p_inv[s]]
1837 to_add = np.dot(emulators[s], terms)
1838 scale[s] += to_add
1839 return scale
1842def _get_scaling(scale, params, fiducial_keys_pos, nuisances,
1843 emulators, degree, p_index, p_inv):
1844 ps, p_terms = _get_param_array(
1845 np.array(params), p_index, degree, fiducial_keys_pos)
1846 for ii, param in enumerate(nuisances):
1847 scale += _get_scale(
1848 scale.size, np.array(p_terms[ii]),
1849 emulators['fiducial'][param], p_inv[ii])
1850 return scale
1853class Normalizer:
1854 # selfmade whitener
1855 def __init__(self):
1856 self.variance = []
1857 self.mean = []
1858 self.is_adapted = False
1860 def adapt(self, input):
1861 # assert 2d array
1862 assert len(input.shape) == 2
1863 self.variance = np.var(input, axis=0)
1864 self.variance[np.isinf(1./self.variance)] = 1.0
1865 self.mean = np.mean(input, axis=0)
1866 self.is_adapted = True
1868 def __call__(self, input):
1869 if (input.shape) == 1:
1870 input = input.reshape(1, -1)
1871 assert len(input.shape) == 2
1872 assert input.shape[1] == self.variance.size
1873 return np.divide(input - self.mean, np.sqrt(self.variance))