Em animation

In [1]:

Copied!





import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
import matplotlib.animation as animation
from matplotlib.animation import PillowWriter
from tqdm import tqdm
import torch
import os
os.chdir('../../')

from tgmm import GaussianMixture

np.random.seed(0)  # for reproducibility
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
import matplotlib.animation as animation
from matplotlib.animation import PillowWriter
from tqdm import tqdm
import torch
import os
os.chdir('../../')

from tgmm import GaussianMixture

np.random.seed(0)  # for reproducibility

Generate Data¶

In [2]:

Copied!





# Generate sample data with real means and stds
real_means = [-2, 2, 6]
real_stds = [2, 1, 3]
cluster_sizes = [2000, 1000, 3000]
data1 = np.random.normal(loc=real_means[0], scale=real_stds[0], size=cluster_sizes[0])
data2 = np.random.normal(loc=real_means[1], scale=real_stds[1], size=cluster_sizes[1])
data3 = np.random.normal(loc=real_means[2], scale=real_stds[2], size=cluster_sizes[2])
data = np.concatenate([data1, data2, data3])

# True mixing coefficients based on the actual data proportions
total_size = sum(cluster_sizes)
true_taus = [size / total_size for size in cluster_sizes]

# Convert data to torch tensor
X_1d = torch.tensor(data, dtype=torch.float32).reshape(-1, 1)

# Initial parameters for the Gaussian Mixture Model
initial_means = torch.tensor([[9.9], [10.0], [10.1]], dtype=torch.float32)
initial_covariances = torch.tensor([[[2.5**2]], [[2.5**2]], [[2.5**2]]], dtype=torch.float32)
initial_weights = torch.tensor([0.33, 0.33, 0.33], dtype=torch.float32)

print(f"Data shape: {data.shape}")
print(f"Real means: {real_means}")
print(f"Real stds: {real_stds}")
print(f"True mixing coefficients: {true_taus}")
# Generate sample data with real means and stds
real_means = [-2, 2, 6]
real_stds = [2, 1, 3]
cluster_sizes = [2000, 1000, 3000]
data1 = np.random.normal(loc=real_means[0], scale=real_stds[0], size=cluster_sizes[0])
data2 = np.random.normal(loc=real_means[1], scale=real_stds[1], size=cluster_sizes[1])
data3 = np.random.normal(loc=real_means[2], scale=real_stds[2], size=cluster_sizes[2])
data = np.concatenate([data1, data2, data3])

# True mixing coefficients based on the actual data proportions
total_size = sum(cluster_sizes)
true_taus = [size / total_size for size in cluster_sizes]

# Convert data to torch tensor
X_1d = torch.tensor(data, dtype=torch.float32).reshape(-1, 1)

# Initial parameters for the Gaussian Mixture Model
initial_means = torch.tensor([[9.9], [10.0], [10.1]], dtype=torch.float32)
initial_covariances = torch.tensor([[[2.5**2]], [[2.5**2]], [[2.5**2]]], dtype=torch.float32)
initial_weights = torch.tensor([0.33, 0.33, 0.33], dtype=torch.float32)

print(f"Data shape: {data.shape}")
print(f"Real means: {real_means}")
print(f"Real stds: {real_stds}")
print(f"True mixing coefficients: {true_taus}")

Data shape: (6000,)
Real means: [-2, 2, 6]
Real stds: [2, 1, 3]
True mixing coefficients: [0.3333333333333333, 0.16666666666666666, 0.5]

Run EM and Track All Parameters¶

In [3]:

Copied!





num_steps = 1000  # Full EM iterations for convergence

# Track parameters over iterations
means_history = np.zeros((num_steps+1, 3))
stds_history = np.zeros((num_steps+1, 3))
taus_history = np.zeros((num_steps+1, 3))
responsibilities_history = []
log_likelihoods = []

means_history[0] = initial_means.squeeze().numpy()
stds_history[0] = [2.5, 2.5, 2.5]
taus_history[0] = initial_weights.numpy()

# Store initial responsibilities
gmm_init = GaussianMixture(
    n_features=1, 
    n_components=3, 
    covariance_type='full',
    init_means=initial_means,
    init_covariances=initial_covariances,
    init_weights=initial_weights,
    device='cpu'
)
gmm_init._allocate_parameters(X_1d, set_random_state=False)
r_init, _ = gmm_init._e_step(X_1d)
responsibilities_history.append(r_init.cpu().numpy())

# Log likelihood function
def log_likelihood(data, means_np, stds_np, taus_np):
    likelihood = np.zeros((data.shape[0], len(means_np)))
    for i in range(len(means_np)):
        likelihood[:, i] = norm.pdf(data, means_np[i], stds_np[i])
    total_likelihood = np.dot(likelihood, taus_np)
    return np.sum(np.log(total_likelihood + 1e-10))  # Add small epsilon for numerical stability

# Calculate initial log-likelihood
log_likelihoods.append(log_likelihood(data, means_history[0], stds_history[0], taus_history[0]))

# Suppress convergence warnings (expected since max_iter=1)
import warnings
warnings.filterwarnings('ignore', message='.*EM did not converge.*')

# Fit one iteration at a time to track progress
actual_steps = 0
for step in tqdm(range(num_steps)):
    # Create a new GMM for this iteration
    gmm_step = GaussianMixture(
        n_features=1, 
        n_components=3, 
        covariance_type='full',
        init_means=torch.tensor(means_history[step].reshape(-1, 1), dtype=torch.float32),
        init_covariances=torch.tensor([[[stds_history[step][i]**2]] for i in range(3)], dtype=torch.float32),
        init_weights=torch.tensor(taus_history[step], dtype=torch.float32),
        device='cpu',
        max_iter=1
    )
    
    gmm_step.fit(X_1d)
    
    # Store parameters
    means_history[step+1] = gmm_step.means_.squeeze().cpu().numpy()
    stds_history[step+1] = torch.sqrt(gmm_step.covariances_[:, 0, 0]).cpu().numpy()
    taus_history[step+1] = gmm_step.weights_.cpu().numpy()
    
    # Store responsibilities
    r, _ = gmm_step._e_step(X_1d)
    responsibilities_history.append(r.cpu().numpy())
    
    # Store log-likelihood
    log_likelihoods.append(log_likelihood(data, means_history[step+1], stds_history[step+1], taus_history[step+1]))
    
    actual_steps = step + 1
    
    if gmm_step.converged_:
        print(f"Converged at iteration {step+1}")
        break

# Trim arrays to actual steps
means_history = means_history[:actual_steps+1]
stds_history = stds_history[:actual_steps+1]
taus_history = taus_history[:actual_steps+1]

print(f"\nTotal iterations: {actual_steps}")
print(f"Final log-likelihood: {log_likelihoods[-1]:.2f}")

# Create frame indices: 0-100 then every 10th iteration
frame_indices = list(range(min(101, len(means_history))))
if len(means_history) > 101:
    frame_indices.extend(range(110, len(means_history), 10))
print(f"Total animation frames: {len(frame_indices)}")
num_steps = 1000  # Full EM iterations for convergence

# Track parameters over iterations
means_history = np.zeros((num_steps+1, 3))
stds_history = np.zeros((num_steps+1, 3))
taus_history = np.zeros((num_steps+1, 3))
responsibilities_history = []
log_likelihoods = []

means_history[0] = initial_means.squeeze().numpy()
stds_history[0] = [2.5, 2.5, 2.5]
taus_history[0] = initial_weights.numpy()

# Store initial responsibilities
gmm_init = GaussianMixture(
    n_features=1, 
    n_components=3, 
    covariance_type='full',
    init_means=initial_means,
    init_covariances=initial_covariances,
    init_weights=initial_weights,
    device='cpu'
)
gmm_init._allocate_parameters(X_1d, set_random_state=False)
r_init, _ = gmm_init._e_step(X_1d)
responsibilities_history.append(r_init.cpu().numpy())

# Log likelihood function
def log_likelihood(data, means_np, stds_np, taus_np):
    likelihood = np.zeros((data.shape[0], len(means_np)))
    for i in range(len(means_np)):
        likelihood[:, i] = norm.pdf(data, means_np[i], stds_np[i])
    total_likelihood = np.dot(likelihood, taus_np)
    return np.sum(np.log(total_likelihood + 1e-10))  # Add small epsilon for numerical stability

# Calculate initial log-likelihood
log_likelihoods.append(log_likelihood(data, means_history[0], stds_history[0], taus_history[0]))

# Suppress convergence warnings (expected since max_iter=1)
import warnings
warnings.filterwarnings('ignore', message='.*EM did not converge.*')

# Fit one iteration at a time to track progress
actual_steps = 0
for step in tqdm(range(num_steps)):
    # Create a new GMM for this iteration
    gmm_step = GaussianMixture(
        n_features=1, 
        n_components=3, 
        covariance_type='full',
        init_means=torch.tensor(means_history[step].reshape(-1, 1), dtype=torch.float32),
        init_covariances=torch.tensor([[[stds_history[step][i]**2]] for i in range(3)], dtype=torch.float32),
        init_weights=torch.tensor(taus_history[step], dtype=torch.float32),
        device='cpu',
        max_iter=1
    )
    
    gmm_step.fit(X_1d)
    
    # Store parameters
    means_history[step+1] = gmm_step.means_.squeeze().cpu().numpy()
    stds_history[step+1] = torch.sqrt(gmm_step.covariances_[:, 0, 0]).cpu().numpy()
    taus_history[step+1] = gmm_step.weights_.cpu().numpy()
    
    # Store responsibilities
    r, _ = gmm_step._e_step(X_1d)
    responsibilities_history.append(r.cpu().numpy())
    
    # Store log-likelihood
    log_likelihoods.append(log_likelihood(data, means_history[step+1], stds_history[step+1], taus_history[step+1]))
    
    actual_steps = step + 1
    
    if gmm_step.converged_:
        print(f"Converged at iteration {step+1}")
        break

# Trim arrays to actual steps
means_history = means_history[:actual_steps+1]
stds_history = stds_history[:actual_steps+1]
taus_history = taus_history[:actual_steps+1]

print(f"\nTotal iterations: {actual_steps}")
print(f"Final log-likelihood: {log_likelihoods[-1]:.2f}")

# Create frame indices: 0-100 then every 10th iteration
frame_indices = list(range(min(101, len(means_history))))
if len(means_history) > 101:
    frame_indices.extend(range(110, len(means_history), 10))
print(f"Total animation frames: {len(frame_indices)}")

100%|██████████| 1000/1000 [00:15<00:00, 64.65it/s]

Total iterations: 1000
Final log-likelihood: -17112.88
Total animation frames: 191

Animation 1: Gaussian Components Evolution¶

In [ ]:

Copied!





# Setup for animation
x = np.linspace(min(data), max(data), 1000)
colors = ['r', 'b', 'g']

fig, ax = plt.subplots(figsize=(12, 6), dpi=100)

# Plot histogram (static)
ax.hist(data, bins=30, density=True, alpha=0.6, color='gray', label='Data histogram')

# Plot real Gaussians (static, dashed)
gaussian_real = np.zeros_like(x)
for real_mean, real_std, true_tau, color in zip(real_means, real_stds, true_taus, colors):
    ax.plot(x, true_tau * norm.pdf(x, real_mean, real_std), color=color, 
            linestyle='--', linewidth=2, alpha=0.7, label=f'True Gaussian {real_means.index(real_mean)+1}')
    gaussian_real += true_tau * norm.pdf(x, real_mean, real_std)

ax.plot(x, gaussian_real, 'k--', linewidth=2.5, alpha=0.7, label='True GMM')

# Initialize animated lines
lines = []
for i, color in enumerate(colors):
    line, = ax.plot([], [], color=color, linewidth=2, label=f'Estimated Gaussian {i+1}')
    lines.append(line)

gmm_line, = ax.plot([], [], 'k-', linewidth=2.5, label='Estimated GMM')

ax.set_xlabel('Value', fontsize=12)
ax.set_ylabel('Density', fontsize=12)
ax.set_title('EM Algorithm: Evolution of Gaussian Components', fontsize=14, fontweight='bold')
ax.legend(loc='upper right', fontsize=9)
ax.set_xlim(min(data), max(data))
ax.set_ylim(0, 0.15)

iteration_text = ax.text(0.02, 0.95, '', transform=ax.transAxes, fontsize=12, 
                         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

def init():
    for line in lines:
        line.set_data([], [])
    gmm_line.set_data([], [])
    iteration_text.set_text('')
    return lines + [gmm_line, iteration_text]

def animate(frame_idx):
    frame = frame_indices[frame_idx]
    gaussian_est = np.zeros_like(x)
    
    for i in range(3):
        y = taus_history[frame, i] * norm.pdf(x, means_history[frame, i], stds_history[frame, i])
        lines[i].set_data(x, y)
        gaussian_est += y
    
    gmm_line.set_data(x, gaussian_est)
    iteration_text.set_text(f'Iteration: {frame}/{actual_steps}\nLog-likelihood: {log_likelihoods[frame]:.2f}')
    
    return lines + [gmm_line, iteration_text]

anim1 = animation.FuncAnimation(fig, animate, init_func=init, frames=tqdm(range(len(frame_indices)), desc='Animation 1'), 
                                interval=100, blit=True, repeat=True)

# Save animation
writer = PillowWriter(fps=10)
anim1.save('/home/asp/Downloads/HeaDS/tgmm/docs/assets/em_gaussians_evolution.gif', writer=writer)


plt.close()
# Setup for animation
x = np.linspace(min(data), max(data), 1000)
colors = ['r', 'b', 'g']

fig, ax = plt.subplots(figsize=(12, 6), dpi=100)

# Plot histogram (static)
ax.hist(data, bins=30, density=True, alpha=0.6, color='gray', label='Data histogram')

# Plot real Gaussians (static, dashed)
gaussian_real = np.zeros_like(x)
for real_mean, real_std, true_tau, color in zip(real_means, real_stds, true_taus, colors):
    ax.plot(x, true_tau * norm.pdf(x, real_mean, real_std), color=color, 
            linestyle='--', linewidth=2, alpha=0.7, label=f'True Gaussian {real_means.index(real_mean)+1}')
    gaussian_real += true_tau * norm.pdf(x, real_mean, real_std)

ax.plot(x, gaussian_real, 'k--', linewidth=2.5, alpha=0.7, label='True GMM')

# Initialize animated lines
lines = []
for i, color in enumerate(colors):
    line, = ax.plot([], [], color=color, linewidth=2, label=f'Estimated Gaussian {i+1}')
    lines.append(line)

gmm_line, = ax.plot([], [], 'k-', linewidth=2.5, label='Estimated GMM')

ax.set_xlabel('Value', fontsize=12)
ax.set_ylabel('Density', fontsize=12)
ax.set_title('EM Algorithm: Evolution of Gaussian Components', fontsize=14, fontweight='bold')
ax.legend(loc='upper right', fontsize=9)
ax.set_xlim(min(data), max(data))
ax.set_ylim(0, 0.15)

iteration_text = ax.text(0.02, 0.95, '', transform=ax.transAxes, fontsize=12, 
                         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

def init():
    for line in lines:
        line.set_data([], [])
    gmm_line.set_data([], [])
    iteration_text.set_text('')
    return lines + [gmm_line, iteration_text]

def animate(frame_idx):
    frame = frame_indices[frame_idx]
    gaussian_est = np.zeros_like(x)
    
    for i in range(3):
        y = taus_history[frame, i] * norm.pdf(x, means_history[frame, i], stds_history[frame, i])
        lines[i].set_data(x, y)
        gaussian_est += y
    
    gmm_line.set_data(x, gaussian_est)
    iteration_text.set_text(f'Iteration: {frame}/{actual_steps}\nLog-likelihood: {log_likelihoods[frame]:.2f}')
    
    return lines + [gmm_line, iteration_text]

anim1 = animation.FuncAnimation(fig, animate, init_func=init, frames=tqdm(range(len(frame_indices)), desc='Animation 1'), 
                                interval=100, blit=True, repeat=True)

# Save animation
writer = PillowWriter(fps=10)
anim1.save('/home/asp/Downloads/HeaDS/tgmm/docs/assets/em_gaussians_evolution.gif', writer=writer)


plt.close()

Creating animation 1...

Animation 1:  99%|█████████▉| 190/191 [00:36<00:00,  5.82it/s]

Animation 2: Responsibilities Evolution¶

In [5]:

Copied!





# Sample data points for visualization (all points would be too dense)
n_sample_points = 500
sample_indices = np.linspace(0, len(data)-1, n_sample_points, dtype=int)
data_sample = data[sample_indices]

fig, ax = plt.subplots(figsize=(12, 6), dpi=100)

# Initialize scatter plots
scatters = []
for i, color in enumerate(colors):
    scatter = ax.scatter([], [], c=color, alpha=0.6, s=20, marker='o', label=f'Cluster {i+1}')
    scatters.append(scatter)

ax.set_xlabel('Value', fontsize=12)
ax.set_ylabel('Responsibility', fontsize=12)
ax.set_title('EM Algorithm: Evolution of Responsibilities', fontsize=14, fontweight='bold')
ax.legend(loc='upper right', fontsize=10)
ax.set_xlim(min(data), max(data))
ax.set_ylim(-0.05, 1.05)
ax.grid(True, alpha=0.3)

iteration_text = ax.text(0.02, 0.95, '', transform=ax.transAxes, fontsize=12, 
                         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

def init():
    for scatter in scatters:
        scatter.set_offsets(np.empty((0, 2)))
    iteration_text.set_text('')
    return scatters + [iteration_text]

def animate(frame_idx):
    frame = frame_indices[frame_idx]
    r = responsibilities_history[frame][sample_indices]
    
    for i in range(3):
        points = np.column_stack([data_sample, r[:, i]])
        scatters[i].set_offsets(points)
    
    iteration_text.set_text(f'Iteration: {frame}/{actual_steps}\nLog-likelihood: {log_likelihoods[frame]:.2f}')
    
    return scatters + [iteration_text]

anim2 = animation.FuncAnimation(fig, animate, init_func=init, frames=tqdm(range(len(frame_indices)), desc='Animation 2'), 
                                interval=100, blit=True, repeat=True)

# Save animation
writer = PillowWriter(fps=10)
anim2.save('/home/asp/Downloads/HeaDS/tgmm/docs/assets/em_responsibilities_evolution.gif', writer=writer)


plt.close()
# Sample data points for visualization (all points would be too dense)
n_sample_points = 500
sample_indices = np.linspace(0, len(data)-1, n_sample_points, dtype=int)
data_sample = data[sample_indices]

fig, ax = plt.subplots(figsize=(12, 6), dpi=100)

# Initialize scatter plots
scatters = []
for i, color in enumerate(colors):
    scatter = ax.scatter([], [], c=color, alpha=0.6, s=20, marker='o', label=f'Cluster {i+1}')
    scatters.append(scatter)

ax.set_xlabel('Value', fontsize=12)
ax.set_ylabel('Responsibility', fontsize=12)
ax.set_title('EM Algorithm: Evolution of Responsibilities', fontsize=14, fontweight='bold')
ax.legend(loc='upper right', fontsize=10)
ax.set_xlim(min(data), max(data))
ax.set_ylim(-0.05, 1.05)
ax.grid(True, alpha=0.3)

iteration_text = ax.text(0.02, 0.95, '', transform=ax.transAxes, fontsize=12, 
                         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

def init():
    for scatter in scatters:
        scatter.set_offsets(np.empty((0, 2)))
    iteration_text.set_text('')
    return scatters + [iteration_text]

def animate(frame_idx):
    frame = frame_indices[frame_idx]
    r = responsibilities_history[frame][sample_indices]
    
    for i in range(3):
        points = np.column_stack([data_sample, r[:, i]])
        scatters[i].set_offsets(points)
    
    iteration_text.set_text(f'Iteration: {frame}/{actual_steps}\nLog-likelihood: {log_likelihoods[frame]:.2f}')
    
    return scatters + [iteration_text]

anim2 = animation.FuncAnimation(fig, animate, init_func=init, frames=tqdm(range(len(frame_indices)), desc='Animation 2'), 
                                interval=100, blit=True, repeat=True)

# Save animation
writer = PillowWriter(fps=10)
anim2.save('/home/asp/Downloads/HeaDS/tgmm/docs/assets/em_responsibilities_evolution.gif', writer=writer)


plt.close()

Animation 3: Parameter Evolution on Log-Likelihood Surface¶

In [6]:

Copied!





# Compute log-likelihood surfaces for each component
mean_range = np.linspace(min(data), max(data), 50)
std_range = np.linspace(0.01, 5, 50)

log_likelihoods_surfaces = []

print("Computing log-likelihood surfaces...")
for comp_idx in tqdm(range(3), desc='Components'):
    ll_surface = np.zeros((len(std_range), len(mean_range)))
    
    for i, m in enumerate(mean_range):
        for j, s in enumerate(std_range):
            test_means = means_history[-1].copy()
            test_stds = stds_history[-1].copy()
            test_means[comp_idx] = m
            test_stds[comp_idx] = s
            ll_surface[j, i] = log_likelihood(data, test_means, test_stds, taus_history[-1])
    
    log_likelihoods_surfaces.append(ll_surface)

# Create animation with 3 subplots
fig, axs = plt.subplots(1, 3, figsize=(18, 5), dpi=100)
fig.suptitle('EM Algorithm: Parameter Evolution on Log-Likelihood Surface', fontsize=14, fontweight='bold')

# Setup static elements
contours = []
paths = []
current_points = []
start_points = []
real_points = []

for idx, (ax, ll_surface) in enumerate(zip(axs, log_likelihoods_surfaces)):
    # Contour plot
    cs = ax.contourf(mean_range, std_range, ll_surface, levels=20, cmap='viridis', alpha=0.7)
    contours.append(cs)
    cbar = fig.colorbar(cs, ax=ax)
    cbar.ax.set_ylabel('Log-Likelihood', fontsize=9)
    
    # Plot real parameters
    real_pt = ax.scatter(real_means[idx], real_stds[idx], color='red', s=100, 
                        marker='o', edgecolors='white', linewidths=2, 
                        label='True parameters', zorder=5)
    real_points.append(real_pt)
    
    # Plot starting point
    start_pt = ax.scatter(means_history[0, idx], stds_history[0, idx], color='white', 
                         s=150, marker='*', edgecolors='black', linewidths=2, 
                         label='Initial parameters', zorder=5)
    start_points.append(start_pt)
    
    # Initialize path and current point
    path, = ax.plot([], [], 'magenta', linewidth=2, label='EM trajectory', zorder=4)
    paths.append(path)
    
    current_pt = ax.scatter([], [], color='cyan', s=100, marker='o', 
                           edgecolors='black', linewidths=2, 
                           label='Current parameters', zorder=6)
    current_points.append(current_pt)
    
    ax.set_title(f'Component {idx+1}', fontsize=12, fontweight='bold')
    ax.set_xlabel('Mean', fontsize=10)
    ax.set_ylabel('Standard Deviation', fontsize=10)
    ax.legend(loc='upper right', fontsize=8)
    ax.grid(True, alpha=0.3)

iteration_text = fig.text(0.5, 0.02, '', ha='center', fontsize=12, 
                         bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

def init():
    for path in paths:
        path.set_data([], [])
    for pt in current_points:
        pt.set_offsets(np.empty((0, 2)))
    iteration_text.set_text('')
    return paths + current_points + [iteration_text]
def animate(frame_idx):
    frame = frame_indices[frame_idx]
    for idx in range(3):
        # Update trajectory - show all steps up to current frame
        paths[idx].set_data(means_history[:frame+1, idx], stds_history[:frame+1, idx])
        
        # Update current point
        current_points[idx].set_offsets([[means_history[frame, idx], stds_history[frame, idx]]])
    
    iteration_text.set_text(f'Iteration: {frame}/{actual_steps} | Log-likelihood: {log_likelihoods[frame]:.2f}')
    
    return paths + current_points + [iteration_text]

anim3 = animation.FuncAnimation(fig, animate, init_func=init, frames=tqdm(range(len(frame_indices)), desc='Animation 3'), 
                                interval=100, blit=True, repeat=True)

# Save animation
writer = PillowWriter(fps=10)
anim3.save('/home/asp/Downloads/HeaDS/tgmm/docs/assets/em_loglikelihood_evolution.gif', writer=writer)
plt.close()

plt.close()
# Compute log-likelihood surfaces for each component
mean_range = np.linspace(min(data), max(data), 50)
std_range = np.linspace(0.01, 5, 50)

log_likelihoods_surfaces = []

print("Computing log-likelihood surfaces...")
for comp_idx in tqdm(range(3), desc='Components'):
    ll_surface = np.zeros((len(std_range), len(mean_range)))
    
    for i, m in enumerate(mean_range):
        for j, s in enumerate(std_range):
            test_means = means_history[-1].copy()
            test_stds = stds_history[-1].copy()
            test_means[comp_idx] = m
            test_stds[comp_idx] = s
            ll_surface[j, i] = log_likelihood(data, test_means, test_stds, taus_history[-1])
    
    log_likelihoods_surfaces.append(ll_surface)

# Create animation with 3 subplots
fig, axs = plt.subplots(1, 3, figsize=(18, 5), dpi=100)
fig.suptitle('EM Algorithm: Parameter Evolution on Log-Likelihood Surface', fontsize=14, fontweight='bold')

# Setup static elements
contours = []
paths = []
current_points = []
start_points = []
real_points = []

for idx, (ax, ll_surface) in enumerate(zip(axs, log_likelihoods_surfaces)):
    # Contour plot
    cs = ax.contourf(mean_range, std_range, ll_surface, levels=20, cmap='viridis', alpha=0.7)
    contours.append(cs)
    cbar = fig.colorbar(cs, ax=ax)
    cbar.ax.set_ylabel('Log-Likelihood', fontsize=9)
    
    # Plot real parameters
    real_pt = ax.scatter(real_means[idx], real_stds[idx], color='red', s=100, 
                        marker='o', edgecolors='white', linewidths=2, 
                        label='True parameters', zorder=5)
    real_points.append(real_pt)
    
    # Plot starting point
    start_pt = ax.scatter(means_history[0, idx], stds_history[0, idx], color='white', 
                         s=150, marker='*', edgecolors='black', linewidths=2, 
                         label='Initial parameters', zorder=5)
    start_points.append(start_pt)
    
    # Initialize path and current point
    path, = ax.plot([], [], 'magenta', linewidth=2, label='EM trajectory', zorder=4)
    paths.append(path)
    
    current_pt = ax.scatter([], [], color='cyan', s=100, marker='o', 
                           edgecolors='black', linewidths=2, 
                           label='Current parameters', zorder=6)
    current_points.append(current_pt)
    
    ax.set_title(f'Component {idx+1}', fontsize=12, fontweight='bold')
    ax.set_xlabel('Mean', fontsize=10)
    ax.set_ylabel('Standard Deviation', fontsize=10)
    ax.legend(loc='upper right', fontsize=8)
    ax.grid(True, alpha=0.3)

iteration_text = fig.text(0.5, 0.02, '', ha='center', fontsize=12, 
                         bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

def init():
    for path in paths:
        path.set_data([], [])
    for pt in current_points:
        pt.set_offsets(np.empty((0, 2)))
    iteration_text.set_text('')
    return paths + current_points + [iteration_text]
def animate(frame_idx):
    frame = frame_indices[frame_idx]
    for idx in range(3):
        # Update trajectory - show all steps up to current frame
        paths[idx].set_data(means_history[:frame+1, idx], stds_history[:frame+1, idx])
        
        # Update current point
        current_points[idx].set_offsets([[means_history[frame, idx], stds_history[frame, idx]]])
    
    iteration_text.set_text(f'Iteration: {frame}/{actual_steps} | Log-likelihood: {log_likelihoods[frame]:.2f}')
    
    return paths + current_points + [iteration_text]

anim3 = animation.FuncAnimation(fig, animate, init_func=init, frames=tqdm(range(len(frame_indices)), desc='Animation 3'), 
                                interval=100, blit=True, repeat=True)

# Save animation
writer = PillowWriter(fps=10)
anim3.save('/home/asp/Downloads/HeaDS/tgmm/docs/assets/em_loglikelihood_evolution.gif', writer=writer)
plt.close()

plt.close()

Computing log-likelihood surfaces...




Components: 100%|██████████| 3/3 [00:02<00:00,  1.09it/s]