Source code for cosmoHammer.CosmoHammerSampler

from __future__ import print_function, division, absolute_import, unicode_literals

import emcee
import numpy as np
import logging
import time

import cosmoHammer
import cosmoHammer.Constants as c

from cosmoHammer import getLogger
from cosmoHammer.util import SampleFileUtil
from cosmoHammer.util import SampleBallPositionGenerator
from cosmoHammer.util.IterationStopCriteriaStrategy import IterationStopCriteriaStrategy



[docs]class CosmoHammerSampler(object): """ A complete sampler implementation taking care of correct setup, chain burn in and sampling. :param params: the parameter of the priors :param likelihoodComputationChain: the callable computation chain :param filePrefix: the prefix for the log and output files :param walkersRatio: the ratio of walkers and the count of sampled parameters :param burninIterations: number of iteration for burn in :param sampleIterations: number of iteration to sample :param stopCriteriaStrategy: the strategy to stop the sampling. Default is None an then IterationStopCriteriaStrategy is used :param initPositionGenerator: the generator for the init walker position. Default is None an then SampleBallPositionGenerator is used :param storageUtil: util used to store the results :param threadCount: The count of threads to be used for the computation. Default is 1 :param reuseBurnin: Flag if the burn in should be reused. If true the values will be read from the file System. Default is False """ def __init__(self, params, likelihoodComputationChain, filePrefix, walkersRatio, burninIterations, sampleIterations, stopCriteriaStrategy=None, initPositionGenerator=None, storageUtil=None, threadCount=1, reuseBurnin=False, logLevel=logging.INFO, pool=None): """ CosmoHammer sampler implementation """ self.params = params self.likelihoodComputationChain = likelihoodComputationChain self.walkersRatio = walkersRatio self.reuseBurnin = reuseBurnin self.filePrefix = filePrefix self.threadCount = threadCount self.paramCount = len(self.paramValues) self.nwalkers = self.paramCount*walkersRatio self.burninIterations = burninIterations self.sampleIterations = sampleIterations assert likelihoodComputationChain is not None, "The sampler needs a chain" assert sampleIterations > 0, "CosmoHammer needs to sample for at least one iterations" if not hasattr(self.likelihoodComputationChain, "params"): self.likelihoodComputationChain.params = params # setting up the logging self._configureLogging(filePrefix+c.LOG_FILE_SUFFIX, logLevel) if self.isMaster(): self.log("Using CosmoHammer "+str(cosmoHammer.__version__)) # The sampler object self._sampler = self.createEmceeSampler(likelihoodComputationChain, pool=pool) if(storageUtil is None): storageUtil = self.createSampleFileUtil() self.storageUtil = storageUtil if(stopCriteriaStrategy is None): stopCriteriaStrategy = self.createStopCriteriaStrategy() stopCriteriaStrategy.setup(self) self.stopCriteriaStrategy = stopCriteriaStrategy if(initPositionGenerator is None): initPositionGenerator = self.createInitPositionGenerator() initPositionGenerator.setup(self) self.initPositionGenerator = initPositionGenerator def _configureLogging(self, filename, logLevel): logger = getLogger() logger.setLevel(logLevel) fh = logging.FileHandler(filename, "w") fh.setLevel(logLevel) # create console handler with a higher log level ch = logging.StreamHandler() ch.setLevel(logging.ERROR) # create formatter and add it to the handlers formatter = logging.Formatter('%(asctime)s %(levelname)s:%(message)s') fh.setFormatter(formatter) ch.setFormatter(formatter) # add the handlers to the logger for handler in logger.handlers[:]: try: handler.close() except AttributeError: pass logger.removeHandler(handler) logger.addHandler(fh) logger.addHandler(ch) # logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', # filename=filename, filemode='w', level=logLevel)
[docs] def createStopCriteriaStrategy(self): """ Returns a new instance of a stop criteria stategy """
return IterationStopCriteriaStrategy()
[docs] def createSampleFileUtil(self): """ Returns a new instance of a File Util """
return SampleFileUtil(self.filePrefix, reuseBurnin=self.reuseBurnin)
[docs] def createInitPositionGenerator(self): """ Returns a new instance of a Init Position Generator """
return SampleBallPositionGenerator() @property def paramValues(self): return self.params[:,0] @property def paramWidths(self): return self.params[:,3]
[docs] def startSampling(self): """ Launches the sampling """ try: if self.isMaster(): self.log(self.__str__()) if(self.burninIterations>0): if(self.reuseBurnin): pos, prob, rstate = self.loadBurnin() datas = [None]*len(pos) else: pos, prob, rstate, datas = self.startSampleBurnin() else: pos = self.createInitPos() prob = None rstate = None datas = None # Starting from the final position in the burn-in chain, sample for 1000 # steps. self.log("start sampling after burn in") start = time.time() self.sample(pos, prob, rstate, datas) end = time.time() self.log("sampling done! Took: " + str(round(end-start,4))+"s") # Print out the mean acceptance fraction. In general, acceptance_fraction # has an entry for each walker self.log("Mean acceptance fraction:"+ str(round(np.mean(self._sampler.acceptance_fraction), 4))) finally: if self._sampler.pool is not None: try: self._sampler.pool.close() except AttributeError: pass try: self.storageUtil.close() except AttributeError:
pass
[docs] def loadBurnin(self): """ loads the burn in form the file system """ self.log("reusing previous burn in") pos = self.storageUtil.importFromFile(self.filePrefix+c.BURNIN_SUFFIX)[-self.nwalkers:] prob = self.storageUtil.importFromFile(self.filePrefix+c.BURNIN_PROB_SUFFIX)[-self.nwalkers:] rstate= self.storageUtil.importRandomState(self.filePrefix+c.BURNIN_STATE_SUFFIX) self.log("loading done")
return pos, prob, rstate
[docs] def startSampleBurnin(self): """ Runs the sampler for the burn in """ self.log("start burn in") start = time.time() p0 = self.createInitPos() pos, prob, rstate, data = self.sampleBurnin(p0) end = time.time() self.log("burn in sampling done! Took: " + str(round(end-start,4))+"s") self.log("Mean acceptance fraction for burn in:" + str(round(np.mean(self._sampler.acceptance_fraction), 4))) self.resetSampler()
return pos, prob, rstate, data
[docs] def resetSampler(self): """ Resets the emcee sampler in the master node """ if self.isMaster(): self.log("Reseting emcee sampler") # Reset the chain to remove the burn-in samples.
self._sampler.reset()
[docs] def sampleBurnin(self, p0): """ Run the emcee sampler for the burnin to create walker which are independent form their starting position """ counter = 1 for pos, prob, rstate, datas in self._sampler.sample(p0, iterations=self.burninIterations): if self.isMaster(): self.storageUtil.persistBurninValues(pos, prob, datas) if(counter%10==0): self.log("Iteration finished:" + str(counter)) counter = counter + 1 if self.isMaster(): self.log("storing random state") self.storageUtil.storeRandomState(self.filePrefix+c.BURNIN_STATE_SUFFIX, rstate)
return pos, prob, rstate, datas
[docs] def sample(self, burninPos, burninProb=None, burninRstate=None, datas=None): """ Starts the sampling process """ counter = 1 for pos, prob, _, datas in self._sampler.sample(burninPos, lnprob0=burninProb, rstate0=burninRstate, blobs0=datas, iterations=self.sampleIterations): if self.isMaster(): self.log("Iteration done. Persisting", logging.DEBUG) self.storageUtil.persistSamplingValues(pos, prob, datas) if(self.stopCriteriaStrategy.hasFinished()): break if(counter%10==0): self.log("Iteration finished:" + str(counter))
counter = counter + 1
[docs] def isMaster(self): """ Returns True. Can be overridden for multitasking i.e. with MPI """
return True
[docs] def log(self, message, level=logging.INFO): """ Logs a message to the logfile """
getLogger().log(level, message)
[docs] def createEmceeSampler(self, callable, **kwargs): """ Factory method to create the emcee sampler """ if self.isMaster(): self.log("Using emcee "+str(emcee.__version__)) return emcee.EnsembleSampler(self.nwalkers, self.paramCount, callable, threads=self.threadCount,
**kwargs)
[docs] def createInitPos(self): """ Factory method to create initial positions """
return self.initPositionGenerator.generate()
[docs] def getChain(self): """ Returns the sample chain """
return self._sampler.chain def __str__(self, *args, **kwargs): """ Returns the string representation of the sampler config """ desc = "Sampler: " + str(type(self))+"\n" \ "configuration: \n" \ " Params: " +str(self.paramValues)+"\n" \ " Burnin iterations: " +str(self.burninIterations)+"\n" \ " Samples iterations: " +str(self.sampleIterations)+"\n" \ " Walkers ratio: " +str(self.walkersRatio)+"\n" \ " Reusing burn in: " +str(self.reuseBurnin)+"\n" \ " init pos generator: " +str(self.initPositionGenerator)+"\n" \ " stop criteria: " +str(self.stopCriteriaStrategy)+"\n" \ " storage util: " +str(self.storageUtil)+"\n" \ "likelihoodComputationChain: \n" + str(self.likelihoodComputationChain) \ +"\n"
return desc