Home
Omar Hosney
LinkedIn Profile

Principal Component Analysis (PCA) Cheat Sheet

Introduction to PCA

PCA Algorithm Steps

PCA Applications

Advantages of PCA

Limitations of PCA

PCA Parameters

PCA in Python (sklearn)

from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler import numpy as np # Generate sample data X = np.random.rand(100, 10) # Standardize the data scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # Apply PCA pca = PCA(n_components=2) X_pca = pca.fit_transform(X_scaled) # Access explained variance ratio print(pca.explained_variance_ratio_) # Access principal components print(pca.components_)

Visualizing PCA Results

import matplotlib.pyplot as plt plt.figure(figsize=(10, 8)) plt.scatter(X_pca[:, 0], X_pca[:, 1]) plt.xlabel('First Principal Component') plt.ylabel('Second Principal Component') plt.title('PCA of Random Data') plt.show() # Scree plot plt.figure(figsize=(10, 6)) plt.plot(range(1, len(pca.explained_variance_ratio_) + 1), np.cumsum(pca.explained_variance_ratio_)) plt.xlabel('Number of Components') plt.ylabel('Cumulative Explained Variance') plt.title('Scree Plot') plt.show()

Choosing Number of Components

# Determine number of components for 95% variance pca = PCA(n_components=0.95) X_pca = pca.fit_transform(X_scaled) print(f"Number of components: {pca.n_components_}") # Alternatively, use a for loop total_variance = 0 for i, variance in enumerate(pca.explained_variance_ratio_): total_variance += variance if total_variance >= 0.95: print(f"95% variance reached at {i+1} components") break

PCA for Feature Selection

# Get feature importance feature_importance = np.abs(pca.components_).sum(axis=0) feature_names = [f"Feature_{i}" for i in range(X.shape[1])] # Sort features by importance sorted_idx = np.argsort(feature_importance) for idx in sorted_idx[::-1]: print(f"{feature_names[idx]}: {feature_importance[idx]:.4f}")

PCA for Noise Reduction

# Add noise to data X_noisy = X + np.random.normal(0, 0.1, X.shape) # Apply PCA for denoising pca = PCA(n_components=0.95) X_denoised = pca.inverse_transform(pca.fit_transform(X_noisy)) # Compare original, noisy, and denoised data fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5)) ax1.imshow(X[:10].T) ax1.set_title("Original Data") ax2.imshow(X_noisy[:10].T) ax2.set_title("Noisy Data") ax3.imshow(X_denoised[:10].T) ax3.set_title("Denoised Data") plt.show()

PCA vs. Other Techniques