Usage¶

Persist and restart sampler¶

The following example

persists the state of the sampler to a file

continues sampling from the persisted sampler

demonstrates how to write output of the lnprob function to stdout or a file

shows how to set a seed

Below we use persist_final(), other options are persist_every_n_iterations() and persist_on_error().

from __future__ import print_function

import numpy as np

from uhammer import Parameters, continue_sampling, persist_final, sample

sigma = .5


def gen_data():
    a0 = .5
    b0 = .5
    c0 = 1

    x = np.linspace(-2, 2, 100)
    y_measured = a0 + b0 * x + c0 * x ** 2 + sigma * np.random.randn(*x.shape)
    return x, y_measured


p = Parameters()
p.add("a", (0, 1))
p.add("b", (0, 1))
p.add("c", (0, 2))


counter = 0


def lnprob(p, x, y_measured):
    global counter
    if counter < 5:
        print(p)
    counter += 1

    y = p.a + p.b * x + p.c * x ** 2
    diff = (y - y_measured) / sigma
    return -np.dot(diff.T, diff) / 2


n_walkers_per_param = 20


# we save the final state of the sampler to 'sampler.pkl' and write the output
# when evaluationg the 'lnprob' function to stdout. We further set a seed:
samples, lnprobs = sample(
    lnprob,
    p,
    args=gen_data(),
    n_walkers_per_param=n_walkers_per_param,
    n_samples=5000,
    show_progress=False,
    show_output=True,
    persist=persist_final("sampler.pkl"),
    seed=42,
)

print()
print("continue sampling")

counter = 0

# we continue sampling from the persisted sampler and write output to 'output.txt':
samples, lnprobs = continue_sampling(
    "sampler.pkl",
    n_samples=3000,
    show_progress=True,
    show_output=False,
    output_prefix="output",
)

$ python examples/sample_line_fit_extended.py
uhammer: perform 84 steps of emcee sampler
Parameters(a=3.745401e-01, b=9.507143e-01, c=1.463988e+00)
Parameters(a=5.986585e-01, b=1.560186e-01, c=3.119890e-01)
Parameters(a=5.808361e-02, b=8.661761e-01, c=1.202230e+00)
Parameters(a=7.080726e-01, b=2.058449e-02, c=1.939820e+00)
Parameters(a=8.324426e-01, b=2.123391e-01, c=3.636499e-01)
uhammer: persisted sampler after iteration 84

continue sampling
uhammer: perform 50 steps of emcee sampler
✗ passed: 00:00:00.6 left: 00:00:00.0 - [∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣]

$ ls -l output_run_0000.txt
-rw-r--r--  1 uweschmitt  staff  295 May 22 15:48 output_run_0000.txt

$ head -2 output_run_0000.txt
Parameters(a=0.4591726382942836, b=0.5968783255152834, c=0.9867551523205441)
Parameters(a=0.34492701778793583, b=0.5968000537205522, c=1.0672690581906268)

The sampler function returns computed samples as a numpy array of shape (n_samples, len(p) arranged as follows:

samples
p-dimensional sample from walker 0
p-dimensional sample from walker 1
…
p-dimensional sample from walker n - 1
p-dimensional sample from walker 0
p-dimensional sample from walker 1
…

Using parallel mode¶

Setting the argument parallel=True enables parallel mode. uhammer detects if your script runs with mpirun or not. Without MPI uhammer spawns workers on all available cores.

On euler this means you can either start your script using MPI:

$ bsub -n 200 mpirun python examples/sample_line_fit_parallel.py

or using cores on one compute node:

$ bsub -n 32 -R fullnode python examples/sample_line_fit_parallel.py

In case you allocate more cores than available, or if number of walkers is not a multiple of number of workers, uhammer will show you some warnings.

from __future__ import print_function

import time

import numpy as np
from mpi4py import MPI

from uhammer import Parameters, persist_every_n_iterations, sample

sigma = .5


def gen_data():
    a0 = .5
    b0 = .5
    c0 = 1

    x = np.linspace(-2, 2, 100)
    y_measured = a0 + b0 * x + c0 * x ** 2 + sigma * np.random.randn(*x.shape)
    return x, y_measured


p = Parameters()
p.add("a", (0, 1))
p.add("b", (0, 1))
p.add("c", (0, 2))


rank = MPI.COMM_WORLD.rank


def lnprob(p, x, y_measured):
    # slow down lnprob so that we can measure speedup
    print(rank)
    time.sleep(.003)
    y = p.a + p.b * x + p.c * x ** 2
    diff = (y - y_measured) / sigma
    return -np.dot(diff.T, diff) / 2


n_samples = 15
n_walkers_per_param = 210


started = time.time()

samples, lnprobs = sample(
    lnprob,
    p,
    args=gen_data(),
    n_walkers_per_param=n_walkers_per_param,
    n_samples=n_samples,
    show_progress=True,
    show_output=False,
    output_prefix="out",
    parallel=True,
    verbose=False,
    persist=persist_every_n_iterations(1, "sampler.pkl"),
)

needed_parallel = time.time() - started
print("NEEDED", needed_parallel)

print()

started = time.time()

samples, lnprobs = sample(
    lnprob,
    p,
    args=gen_data(),
    n_walkers_per_param=n_walkers_per_param,
    n_samples=n_samples,
    show_progress=True,
    show_output=False,
    output_prefix="out",
    parallel=False,
    verbose=True,
)

needed_serial = time.time() - started

print("speedup: {:.2f}".format(needed_serial / needed_parallel))

$ python examples/sample_line_fit_parallel.py
✗ passed: 00:00:00.7 left: 00:00:00.0 - [∣]
NEEDED 0.7649698257446289

uhammer: perform 1 steps of emcee sampler
✗ passed: 00:00:04.7 left: 00:00:00.0 - [∣]
speedup: 6.10

$ ls -l out_run_000*.txt
-rw-r--r--  1 uweschmitt  staff   304 May 22 15:57 out_run_0000_worker_1.txt
-rw-r--r--  1 uweschmitt  staff   312 May 22 15:57 out_run_0000_worker_2.txt
-rw-r--r--  1 uweschmitt  staff   264 May 22 15:57 out_run_0000_worker_3.txt
-rw-r--r--  1 uweschmitt  staff   306 May 22 15:57 out_run_0000_worker_4.txt
-rw-r--r--  1 uweschmitt  staff   296 May 22 15:57 out_run_0000_worker_5.txt
-rw-r--r--  1 uweschmitt  staff   300 May 22 15:57 out_run_0000_worker_6.txt
-rw-r--r--  1 uweschmitt  staff   300 May 22 15:57 out_run_0000_worker_7.txt
-rw-r--r--  1 uweschmitt  staff  2040 May 22 15:57 out_run_0001.txt

Sampling from a distribution¶

from __future__ import print_function

import numpy as np

from uhammer import distribution, gaussian, sample

cov = np.array(((1, .2), (.2, 1)))

lnprob, p = distribution(gaussian(cov), 2, (0, 3))

samples, lnprobs = sample(
    lnprob, p, args=None, n_walkers_per_param=100, n_samples=2000, show_progress=False
)

print(samples[1000:, :].mean(axis=0))

$ python examples/sample_from_distribution.py
uhammer: perform 100 steps of emcee sampler
[1.19346601 1.11656067]

Fitting a model¶

from __future__ import print_function

import numpy as np

from uhammer import Parameters, gaussian, model, sample

a0 = .5
b0 = .5
c0 = 1

sigma = .5

x = np.linspace(-1, 1, 100)
y_measured = a0 + b0 * x + c0 * x ** 2 + sigma * np.random.randn(*x.shape)

p = Parameters()
p.add("a", (0, 1))
p.add("b", (0, 1))
p.add("c", (0, 2))


def line(p, x):
    y = p.a + p.b * x + p.c * x ** 2
    return y


lnprob_via_model = model(line, gaussian(sigma ** 2), x, y_measured)


n_samples = 5000
n_walkers_per_param = 200

samples, lnprob = sample(
    lnprob_via_model,
    p,
    args=[x, y_measured],
    n_walkers_per_param=n_walkers_per_param,
    seed=42,
    n_samples=n_samples,
    show_progress=True,
    show_output=False,
)

# only thinned out samples after burnin:
print(samples[1500::15, :].mean(axis=0))

$ python examples/fit_model.py
uhammer: perform 9 steps of emcee sampler
✗ passed: 00:00:00.2 left: 00:00:00.0 - [∣∣∣∣∣∣∣∣∣]
[0.56275974 0.42042152 1.00929049]