%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from PIL import Image
EigenFace
PCA
SVD
This notebook will dive into eigenvalues and eigenvectors with covariance matrix. We also compare this method with SVD to understand what happens under the hood.
Import packages
Load data
= np.loadtxt('../../../data/faces_train.txt')
X_train = np.loadtxt('../../../data/faces_train_labels.txt')
y_train X_train.shape, y_train.shape
((280, 1024), (280,))
= np.loadtxt('../../../data/faces_test.txt')
X_test = np.loadtxt('../../../data/faces_test_labels.txt')
y_test X_test.shape, y_test.shape
((120, 1024), (120,))
Understand the data
= X_train[0]
sample sample.shape
(1024,)
0] / 40 X_train.shape[
7.0
0] / 40 X_test.shape[
3.0
Define a function to convert the data to image array and display the face images.
def show_images(arr, num_person, num_faces):
= arr[:num_person*10]
data = plt.subplots(num_person, num_faces, figsize=(num_faces, num_person))
fig, axs = axs.flatten()
flat_axs for i in range(num_person):
for j in range(num_faces):
*num_faces+j].imshow(arr[i * int(arr.shape[0] / 40) + j].reshape(32, 32).T)
flat_axs[i*num_faces+j].axis('off')
flat_axs[i'gray')
plt.set_cmap(
plt.tight_layout() plt.show()
3, 5) show_images(X_train,
Show mean face
def show_image(data):
= plt.subplots(figsize=(1,1))
fig, ax 32, 32).T)
ax.imshow(data.reshape('off')
ax.axis('gray')
plt.set_cmap(
plt.tight_layout() plt.show()
= X_train.mean(0) # shape (1024,)
mean show_image(mean)
Perform PCA from covariance matrix
Compute eigenvalues and eigenvectors of covariance matrix
def pca(data):
= data.mean(0) # shape (1024,)
mean = data - mean
Z = Z.T @ Z
S = np.linalg.eigh(S) # the eigen values are sorted from smallest to largest
eigenvals, eigenvecs = np.argsort(-eigenvals) # get the reversed indices from the largest to smallest
reversed_idx = eigenvals[reversed_idx]
eigenvals = eigenvecs[:, reversed_idx]
eigenvecs return eigenvals, eigenvecs
%%time
= pca(X_train) eigen_vals, eigen_vecs
CPU times: user 1.15 s, sys: 63.9 ms, total: 1.21 s
Wall time: 174 ms
eigen_vals.shape, eigen_vecs.shape
((1024,), (1024, 1024))
eigen_vals
array([ 7.59100215e+07, 4.19397005e+07, 2.54955933e+07, ...,
-5.02190332e-09, -7.09703055e-09, -7.63365465e-09])
5] eigen_vecs[:
array([[-0.01287756, -0.05501646, 0.0041873 , ..., 0. ,
0. , 0. ],
[-0.01462181, -0.06097046, -0.00761386, ..., 0.04503731,
0.06237077, 0.21112058],
[-0.01704114, -0.06917858, -0.01997429, ..., 0.08273158,
0.36417853, 0.14920292],
[-0.02048617, -0.07692255, -0.02277752, ..., -0.04538269,
-0.0119913 , -0.37879662],
[-0.02213695, -0.07991882, -0.02080318, ..., 0.02432163,
0.13519464, 0.02273907]])
Show top 5 eigenfaces
def show_pc_images(data):
= data.shape
nrows, ncols = plt.subplots(1, ncols, figsize=(ncols, 1.5))
fig, axs = axs.flatten()
flat_axs for i in range(ncols):
32, 32).T)
flat_axs[i].imshow(data[:, i].reshape('off')
flat_axs[i].axis(f'pc{i}')
flat_axs[i].set_title('gray')
plt.set_cmap(
plt.tight_layout() plt.show()
5]) show_pc_images(eigen_vecs[:, :
Perform PCA with SVD
def svd(data):
= data - data.mean(0)
data_centered = np.linalg.svd(data_centered)
U, s, Vt return U, s, Vt.T
%%time
= svd(X_train) U, s, V
CPU times: user 837 ms, sys: 26.8 ms, total: 864 ms
Wall time: 124 ms
U.shape, s.shape, V.shape
((280, 280), (280,), (1024, 1024))
Check if the top 5 eigenvalues are equal
5]), eigen_vals[:5] np.square(s[:
(array([75910021.54729882, 41939700.47489863, 25495593.34635307,
17539063.72985784, 12170662.99105223]),
array([75910021.54729888, 41939700.47489879, 25495593.34635304,
17539063.72985789, 12170662.99105226]))
5]), eigen_vals[:5]) np.allclose(np.square(s[:
True
Check if the top 5 eigenvectors are equal
5], eigen_vecs[:, :5] V[:, :
(array([[-0.01287756, 0.05501646, 0.0041873 , 0.00651911, 0.0704008 ],
[-0.01462181, 0.06097046, -0.00761386, 0.00091407, 0.07140231],
[-0.01704114, 0.06917858, -0.01997429, -0.00113944, 0.0715405 ],
...,
[ 0.00308199, -0.04617929, -0.03671734, 0.03130467, 0.07777847],
[ 0.00747202, -0.0494147 , -0.04110058, 0.03836678, 0.07902728],
[ 0.0109414 , -0.05125083, -0.03781413, 0.04223883, 0.07396954]]),
array([[-0.01287756, -0.05501646, 0.0041873 , -0.00651911, 0.0704008 ],
[-0.01462181, -0.06097046, -0.00761386, -0.00091407, 0.07140231],
[-0.01704114, -0.06917858, -0.01997429, 0.00113944, 0.0715405 ],
...,
[ 0.00308199, 0.04617929, -0.03671734, -0.03130467, 0.07777847],
[ 0.00747202, 0.0494147 , -0.04110058, -0.03836678, 0.07902728],
[ 0.0109414 , 0.05125083, -0.03781413, -0.04223883, 0.07396954]]))
abs(V[:, :5]), np.abs(eigen_vecs[:, :5])) np.allclose(np.
True
Show top 5 eigenfaces
5]) show_pc_images(V[:, :
Projecting 3 persons’ faces data down to 2 dimensions and plot them.
= X_train[:7*3, :] @ V[:, :2]
T T.shape
(21, 2)
= y_train[:7*3].reshape(-1, 1)
y y.shape
(21, 1)
= pd.DataFrame(data=np.concatenate((T, y), axis=1), columns=['pc1', 'pc2', 'person'])
data_2d data_2d.head()
pc1 | pc2 | person | |
---|---|---|---|
0 | -4907.496730 | 458.054067 | 1.0 |
1 | -4344.197376 | 1289.734627 | 1.0 |
2 | -4775.345055 | 577.090674 | 1.0 |
3 | -4492.115417 | -930.775823 | 1.0 |
4 | -4639.398032 | 840.018975 | 1.0 |
= plt.subplots(figsize=(8, 5))
fig, ax =data_2d, x='pc1', y='pc2', hue='person', palette=['blue', 'red', 'green'], ax=ax)
sns.scatterplot(data plt.show()
The above plot shows that the data is separable.
Explained variance ratio
s.shape
(280,)
= s / s.sum()
s_norm
plt.plot(np.cumsum(s_norm))'singular values')
plt.xlabel('cumulative sum')
plt.ylabel(0, 280)
plt.xlim(0, 1)
plt.ylim( plt.show()
Reconstruct face from top principle components
Original face
= X_train[0]
picked_face show_image(picked_face)
Reconstruct_face method 1
= 20 # the kth eigenvector k
= (picked_face - mean) @ eigen_vecs[:, :k] @ eigen_vecs[:, :k].T + mean
reconstruct_face1 show_image(reconstruct_face1)
Reconstruct_face method 2
= np.diag(s)
S S.shape
(280, 280)
= U[0, :k] @ S[:k, :k] @ (V.T)[:k, :] + mean
reconstruct_face2 reconstruct_face2.shape
(1024,)
show_image(reconstruct_face2)
Confirm the two reconstruct_faces are the same
np.allclose(reconstruct_face1, reconstruct_face2)
True