代码之家 › 专栏 › 技术社区 › kilojoules

numpy-有效过滤随机约束下的随机样本

sampling numpy python

kilojoules · 技术社区 · 6 年前

import numpy as np
from scipy.special import gamma
import matplotlib.pyplot as plt
def f(x): return 1. / gamma(3) * x * np.exp(-1 * x)
lambd = .2
c = 1 / lambd / gamma(3) * (2./(1-lambd)) ** 2 * np.exp(-1 * (1 - lambd) * (2. / (lambd - 1)))
def g(x): return c * lambd * np.exp(-1 * lambd * x)
x = np.linspace(0, 50, 1000)
samples = []
N = 100
while len(samples) < N:
    randou = np.random.uniform(0, 1)
    randoh = c * np.random.exponential(0.2)
    if randou <= f(randoh) / g(randoh): samples.append(randoh)
plt.hist(samples, 100, normed=True, label='Simulated PDF')
plt.plot(x, f(x), label='True PDF', lw=2)
plt.xlim(0, 10)
plt.show()

我还尝试一次性生成样本,然后在while循环中过滤这些样本,但我不确定这种方法实际上要快多少:

samples = np.random.uniform(0, 1, 100000)
hsamps = c * np.random.exponential(0.2, 100000)
N = 100
idx = np.array([True, False])
while len(idx[idx==True]) > 0:
    idx = samples > ( f(hsamps) / g(hsamps))
    samples[idx] = np.random.uniform(0, 1, len(idx[idx==True]))
    hsamps[idx] = c * np.random.exponential(0.2, len(idx[idx==True]))

1 回复 | 直到 6 年前

unutbu 6 年前

为了利用NumPy的速度,您需要使用大型数组,而不是在循环中处理单个标量。例如,你可以生成 N

    randous = np.random.uniform(0, 1, size=N)
    randohs = c * np.random.exponential(0.2, size=N)

然后像这样选择那些通过过滤器的:

    mask = randous <= f(randohs) / g(randohs)
    return randohs[mask]

唯一的问题是不能保证 randohs[mask] 具有所需数量的值(或任何值)。所以我们可能要重复这个直到我们产生足够的样本:

while len(samples) < N:
    randohs = generate_samples()
    samples.extend(randohs)
samples = samples[:N]

import numpy as np
from scipy.special import gamma
import matplotlib.pyplot as plt

def f(x): 
    return 1. / gamma(3) * x * np.exp(-1 * x)
def g(x): 
    return c * lambd * np.exp(-1 * lambd * x)

def generate_samples(N=10**5):
    randous = np.random.uniform(0, 1, size=N)
    randohs = c * np.random.exponential(0.2, size=N)
    mask = randous <= f(randohs) / g(randohs)
    return randohs[mask]

lambd = .2
c = (1 / lambd / gamma(3) * (2./(1-lambd)) ** 2 
     * np.exp(-1 * (1 - lambd) * (2. / (lambd - 1))))
x = np.linspace(0, 50, 1000)

samples = []
N = 10**5
while len(samples) < N:
    randohs = generate_samples()
    samples.extend(randohs)
samples = samples[:N]

plt.hist(samples, 100, density=True, label='Simulated PDF')
plt.plot(x, f(x), label='True PDF', lw=2)
plt.xlim(0, 10)
plt.show()