APM_4AI09_learn/intervalle.ipynb

126 KiB

<html lang="en"> <head> </head>

Estimateur par intervalle

import numpy as np
import matplotlib.pyplot as plt

Densité $f: \mathbb{R} \to [0, \infty)$ inconnue, échantillon $x_1, \ldots, x_N$ i.i.d. de $f$. On veut estimer $f$ à partir de cet échantillon.

$$ f: x \rightarrow \frac{1}{\sigma \sqrt{2\pi}} e^{-\frac{1}{2} \left( \frac{x-\mu}{\sigma} \right)^2} $$

mu = 1
sigma = 1

def unknown_density(x):
    return 1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
def generate_samples(density, n_samples):
    samples = []
    x_min, x_max = -5, 5
    y_max = 0.45 

    while len(samples) < n_samples:
        # 1. Tirer un x candidat uniformément dans l'intervalle
        x_cand = np.random.uniform(x_min, x_max)
        # 2. Tirer une hauteur uniforme u
        u = np.random.uniform(0, y_max)
        
        # 3. Accepter si u est sous la courbe de votre densité
        if u < density(x_cand):
            samples.append(x_cand)
            
    return np.array(samples)
samples = generate_samples(unknown_density, 2000)

x_plot = np.linspace(-5, 5, 1000)
y_plot = unknown_density(x_plot)

plt.figure(figsize=(10, 6))

plt.hist(samples, bins=50, density=True, alpha=0.6, color='skyblue', label='Échantillons iid générés')

plt.plot(x_plot, y_plot, 'r-', lw=2, label='Densité théorique (unknown_density)')

plt.title('Vérification de la génération des $x_i$ iid')
plt.xlabel('$x$')
plt.ylabel('Densité')
plt.legend()
plt.grid(axis='y', alpha=0.3)
plt.show()
No description has been provided for this image
kernel = lambda u: 0.5 * (np.abs(u) <= 1) 

# plot kernel
u_plot = np.linspace(-2, 2, 1000)
plt.figure(figsize=(10, 4))
plt.plot(u_plot, kernel(u_plot), 'b-', lw=2, label='Noyau uniforme')
plt.title('Noyau uniforme') 
Text(0.5, 1.0, 'Noyau uniforme')
No description has been provided for this image

$$ \hat{f}(x) = \frac{1}{N h} \sum_{i=1}^N K\left( \frac{x - x_i}{h} \right) $$

def estimator(kernel, samples, bandwidth = 0.5):
    N = len(samples)
    def estimate(x):
        S = 0
        for x_i in samples:
            S += kernel((x - x_i) / bandwidth)
        return S / (N * bandwidth)
    return estimate
estimate = estimator(unknown_density, samples, 0.2)

x_plot = np.linspace(-5, 5, 1000)
y_density = unknown_density(x_plot)
y_testimate = np.array([estimate(x) for x in x_plot])

print(x_plot.shape, y_testimate.shape)

plt.plot(x_plot, y_density, 'r-', lw=2, label='Densité théorique (unknown_density)')
plt.plot(x_plot, y_testimate, 'g-', lw=2, label='Estimation par noyau (estimator)')
plt.title('Comparaison entre l\'estimation par noyau et la densité théorique')
plt.xlabel('$x$')
plt.ylabel('Densité')
plt.legend()
(1000,) (1000,)
<matplotlib.legend.Legend at 0x7b8f2c6d3470>
No description has been provided for this image
</html>