The following example
persists the state of the sampler to a file
continues sampling from the persisted sampler
demonstrates how to write output of the
lnprob
function to stdout or a fileshows how to set a seed
Below we use
persist_final()
, other options arepersist_every_n_iterations()
andpersist_on_error()
.
from __future__ import print_function
import numpy as np
from uhammer import Parameters, continue_sampling, persist_final, sample
sigma = .5
def gen_data():
a0 = .5
b0 = .5
c0 = 1
x = np.linspace(-2, 2, 100)
y_measured = a0 + b0 * x + c0 * x ** 2 + sigma * np.random.randn(*x.shape)
return x, y_measured
p = Parameters()
p.add("a", (0, 1))
p.add("b", (0, 1))
p.add("c", (0, 2))
counter = 0
def lnprob(p, x, y_measured):
global counter
if counter < 5:
print(p)
counter += 1
y = p.a + p.b * x + p.c * x ** 2
diff = (y - y_measured) / sigma
return -np.dot(diff.T, diff) / 2
n_walkers_per_param = 20
# we save the final state of the sampler to 'sampler.pkl' and write the output
# when evaluationg the 'lnprob' function to stdout. We further set a seed:
samples, lnprobs = sample(
lnprob,
p,
args=gen_data(),
n_walkers_per_param=n_walkers_per_param,
n_samples=5000,
show_progress=False,
show_output=True,
persist=persist_final("sampler.pkl"),
seed=42,
)
print()
print("continue sampling")
counter = 0
# we continue sampling from the persisted sampler and write output to 'output.txt':
samples, lnprobs = continue_sampling(
"sampler.pkl",
n_samples=3000,
show_progress=True,
show_output=False,
output_prefix="output",
)
$ python examples/sample_line_fit_extended.py
uhammer: perform 84 steps of emcee sampler
Parameters(a=3.745401e-01, b=9.507143e-01, c=1.463988e+00)
Parameters(a=5.986585e-01, b=1.560186e-01, c=3.119890e-01)
Parameters(a=5.808361e-02, b=8.661761e-01, c=1.202230e+00)
Parameters(a=7.080726e-01, b=2.058449e-02, c=1.939820e+00)
Parameters(a=8.324426e-01, b=2.123391e-01, c=3.636499e-01)
uhammer: persisted sampler after iteration 84
continue sampling
uhammer: perform 50 steps of emcee sampler
✗ passed: 00:00:00.6 left: 00:00:00.0 - [∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣∣]
$ ls -l output_run_0000.txt
-rw-r--r-- 1 uweschmitt staff 295 May 22 15:48 output_run_0000.txt
$ head -2 output_run_0000.txt
Parameters(a=0.4591726382942836, b=0.5968783255152834, c=0.9867551523205441)
Parameters(a=0.34492701778793583, b=0.5968000537205522, c=1.0672690581906268)
The sampler function returns computed samples as a numpy
array of shape
(n_samples, len(p)
arranged as follows:
samples |
---|
p-dimensional sample from walker 0 |
p-dimensional sample from walker 1 |
… |
p-dimensional sample from walker n - 1 |
p-dimensional sample from walker 0 |
p-dimensional sample from walker 1 |
… |
Setting the argument parallel=True enables parallel mode. uhammer detects if your script runs with mpirun or not. Without MPI uhammer spawns workers on all available cores.
On euler this means you can either start your script using MPI:
$ bsub -n 200 mpirun python examples/sample_line_fit_parallel.py
or using cores on one compute node:
$ bsub -n 32 -R fullnode python examples/sample_line_fit_parallel.py
In case you allocate more cores than available, or if number of walkers is not a multiple of number of workers, uhammer will show you some warnings.
from __future__ import print_function
import time
import numpy as np
from mpi4py import MPI
from uhammer import Parameters, persist_every_n_iterations, sample
sigma = .5
def gen_data():
a0 = .5
b0 = .5
c0 = 1
x = np.linspace(-2, 2, 100)
y_measured = a0 + b0 * x + c0 * x ** 2 + sigma * np.random.randn(*x.shape)
return x, y_measured
p = Parameters()
p.add("a", (0, 1))
p.add("b", (0, 1))
p.add("c", (0, 2))
rank = MPI.COMM_WORLD.rank
def lnprob(p, x, y_measured):
# slow down lnprob so that we can measure speedup
print(rank)
time.sleep(.003)
y = p.a + p.b * x + p.c * x ** 2
diff = (y - y_measured) / sigma
return -np.dot(diff.T, diff) / 2
n_samples = 15
n_walkers_per_param = 210
started = time.time()
samples, lnprobs = sample(
lnprob,
p,
args=gen_data(),
n_walkers_per_param=n_walkers_per_param,
n_samples=n_samples,
show_progress=True,
show_output=False,
output_prefix="out",
parallel=True,
verbose=False,
persist=persist_every_n_iterations(1, "sampler.pkl"),
)
needed_parallel = time.time() - started
print("NEEDED", needed_parallel)
print()
started = time.time()
samples, lnprobs = sample(
lnprob,
p,
args=gen_data(),
n_walkers_per_param=n_walkers_per_param,
n_samples=n_samples,
show_progress=True,
show_output=False,
output_prefix="out",
parallel=False,
verbose=True,
)
needed_serial = time.time() - started
print("speedup: {:.2f}".format(needed_serial / needed_parallel))
$ python examples/sample_line_fit_parallel.py
✗ passed: 00:00:00.7 left: 00:00:00.0 - [∣]
NEEDED 0.7649698257446289
uhammer: perform 1 steps of emcee sampler
✗ passed: 00:00:04.7 left: 00:00:00.0 - [∣]
speedup: 6.10
$ ls -l out_run_000*.txt
-rw-r--r-- 1 uweschmitt staff 304 May 22 15:57 out_run_0000_worker_1.txt
-rw-r--r-- 1 uweschmitt staff 312 May 22 15:57 out_run_0000_worker_2.txt
-rw-r--r-- 1 uweschmitt staff 264 May 22 15:57 out_run_0000_worker_3.txt
-rw-r--r-- 1 uweschmitt staff 306 May 22 15:57 out_run_0000_worker_4.txt
-rw-r--r-- 1 uweschmitt staff 296 May 22 15:57 out_run_0000_worker_5.txt
-rw-r--r-- 1 uweschmitt staff 300 May 22 15:57 out_run_0000_worker_6.txt
-rw-r--r-- 1 uweschmitt staff 300 May 22 15:57 out_run_0000_worker_7.txt
-rw-r--r-- 1 uweschmitt staff 2040 May 22 15:57 out_run_0001.txt
from __future__ import print_function
import numpy as np
from uhammer import distribution, gaussian, sample
cov = np.array(((1, .2), (.2, 1)))
lnprob, p = distribution(gaussian(cov), 2, (0, 3))
samples, lnprobs = sample(
lnprob, p, args=None, n_walkers_per_param=100, n_samples=2000, show_progress=False
)
print(samples[1000:, :].mean(axis=0))
$ python examples/sample_from_distribution.py
uhammer: perform 100 steps of emcee sampler
[1.19346601 1.11656067]
from __future__ import print_function
import numpy as np
from uhammer import Parameters, gaussian, model, sample
a0 = .5
b0 = .5
c0 = 1
sigma = .5
x = np.linspace(-1, 1, 100)
y_measured = a0 + b0 * x + c0 * x ** 2 + sigma * np.random.randn(*x.shape)
p = Parameters()
p.add("a", (0, 1))
p.add("b", (0, 1))
p.add("c", (0, 2))
def line(p, x):
y = p.a + p.b * x + p.c * x ** 2
return y
lnprob_via_model = model(line, gaussian(sigma ** 2), x, y_measured)
n_samples = 5000
n_walkers_per_param = 200
samples, lnprob = sample(
lnprob_via_model,
p,
args=[x, y_measured],
n_walkers_per_param=n_walkers_per_param,
seed=42,
n_samples=n_samples,
show_progress=True,
show_output=False,
)
# only thinned out samples after burnin:
print(samples[1500::15, :].mean(axis=0))
$ python examples/fit_model.py
uhammer: perform 9 steps of emcee sampler
✗ passed: 00:00:00.2 left: 00:00:00.0 - [∣∣∣∣∣∣∣∣∣]
[0.56275974 0.42042152 1.00929049]