UPDATE: 2022-10-28 20:13:11
変数同士の分散と共分散を行列としてまとめた行列。
library(mvtnorm)
library(tidyverse)
#行列
<- matrix(c(1,1,2,1,1,3,4,4,2,1,
A 3,2,3,2,2,4,1,4,3,2,
1,1,1,2,2,1,2,1,2,2,
1,3,1,1,2,1,3,2,4,1,
4,1,1,1,3,3,4,4,2,4,
5,3,5,5,2,5,4,4,4,1,
26,22,27,34,28,20,18,30,23,32), 10, 7)
このような行列の偏差行列を考える。偏差行列は各列の平均で各列の要素を除算したもの。
#偏差行列
<- sweep(A, 2, apply(A, 2, mean))
AA AA
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] -1 0.4 -0.5 -0.9 1.3 1.2 0
## [2,] -1 -0.6 -0.5 1.1 -1.7 -0.8 -4
## [3,] 0 0.4 -0.5 -0.9 -1.7 1.2 1
## [4,] -1 -0.6 0.5 -0.9 -1.7 1.2 8
## [5,] -1 -0.6 0.5 0.1 0.3 -1.8 2
## [6,] 1 1.4 -0.5 -0.9 0.3 1.2 -6
## [7,] 2 -1.6 0.5 1.1 1.3 0.2 -8
## [8,] 2 1.4 -0.5 0.1 1.3 0.2 4
## [9,] 0 0.4 0.5 2.1 -0.7 0.2 -3
## [10,] -1 -0.6 0.5 -0.9 1.3 -2.8 6
「偏差行列を転置した行列」と「偏差行列」を掛け合わせると、分散をN倍した行列となる。
#n倍のt(偏差行列)*偏差行列
t(AA) %*% AA
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 14 3.0 -1.0 3.0 6.0 5.0 -26
## [2,] 3 8.4 -3.0 -2.4 0.8 5.2 2
## [3,] -1 -3.0 2.5 1.5 0.5 -3.0 5
## [4,] 3 -2.4 1.5 10.9 -1.3 -2.2 -27
## [5,] 6 0.8 0.5 -1.3 16.1 -4.6 -5
## [6,] 5 5.2 -3.0 -2.2 -4.6 17.6 -15
## [7,] -26 2.0 5.0 -27.0 -5.0 -15.0 246
この行列を行数=ベクトルの長さで割れば共分散行列となる。cov()と同じ。
#共分散行列に戻すためにn-1(不偏分散)で割る
<- (t(AA) %*% AA)/(nrow(A)-1)
cov_mat round(cov_mat,2)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1.56 0.33 -0.11 0.33 0.67 0.56 -2.89
## [2,] 0.33 0.93 -0.33 -0.27 0.09 0.58 0.22
## [3,] -0.11 -0.33 0.28 0.17 0.06 -0.33 0.56
## [4,] 0.33 -0.27 0.17 1.21 -0.14 -0.24 -3.00
## [5,] 0.67 0.09 0.06 -0.14 1.79 -0.51 -0.56
## [6,] 0.56 0.58 -0.33 -0.24 -0.51 1.96 -1.67
## [7,] -2.89 0.22 0.56 -3.00 -0.56 -1.67 27.33
round(cov(A),2)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1.56 0.33 -0.11 0.33 0.67 0.56 -2.89
## [2,] 0.33 0.93 -0.33 -0.27 0.09 0.58 0.22
## [3,] -0.11 -0.33 0.28 0.17 0.06 -0.33 0.56
## [4,] 0.33 -0.27 0.17 1.21 -0.14 -0.24 -3.00
## [5,] 0.67 0.09 0.06 -0.14 1.79 -0.51 -0.56
## [6,] 0.56 0.58 -0.33 -0.24 -0.51 1.96 -1.67
## [7,] -2.89 0.22 0.56 -3.00 -0.56 -1.67 27.33
共分散行列から相関行列に変換するために標準偏差の対角行列を用いて変換する。
<- sqrt(diag(1/diag(cov_mat))) %*% cov_mat%*% sqrt(diag(1/diag(cov_mat)))
cor_mat round(cor_mat,2)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1.00 0.28 -0.17 0.24 0.40 0.32 -0.44
## [2,] 0.28 1.00 -0.65 -0.25 0.07 0.43 0.04
## [3,] -0.17 -0.65 1.00 0.29 0.08 -0.45 0.20
## [4,] 0.24 -0.25 0.29 1.00 -0.10 -0.16 -0.52
## [5,] 0.40 0.07 0.08 -0.10 1.00 -0.27 -0.08
## [6,] 0.32 0.43 -0.45 -0.16 -0.27 1.00 -0.23
## [7,] -0.44 0.04 0.20 -0.52 -0.08 -0.23 1.00
round(cor(A),2)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1.00 0.28 -0.17 0.24 0.40 0.32 -0.44
## [2,] 0.28 1.00 -0.65 -0.25 0.07 0.43 0.04
## [3,] -0.17 -0.65 1.00 0.29 0.08 -0.45 0.20
## [4,] 0.24 -0.25 0.29 1.00 -0.10 -0.16 -0.52
## [5,] 0.40 0.07 0.08 -0.10 1.00 -0.27 -0.08
## [6,] 0.32 0.43 -0.45 -0.16 -0.27 1.00 -0.23
## [7,] -0.44 0.04 0.20 -0.52 -0.08 -0.23 1.00
逆のパターン。相関行列から共分散行列。
#相関行列→共分散行列
<- diag(apply(A, 2, sd)) #SDが対角成分にある対角行列
diag_mat <- diag_mat %*% cor_mat %*% diag_mat
cov_mat2 round(cov_mat2,2)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1.56 0.33 -0.11 0.33 0.67 0.56 -2.89
## [2,] 0.33 0.93 -0.33 -0.27 0.09 0.58 0.22
## [3,] -0.11 -0.33 0.28 0.17 0.06 -0.33 0.56
## [4,] 0.33 -0.27 0.17 1.21 -0.14 -0.24 -3.00
## [5,] 0.67 0.09 0.06 -0.14 1.79 -0.51 -0.56
## [6,] 0.56 0.58 -0.33 -0.24 -0.51 1.96 -1.67
## [7,] -2.89 0.22 0.56 -3.00 -0.56 -1.67 27.33
表現行列をベクトルにかけることでベクトルを写す。下記では、ベクトルを45°回転させる。
#45°回転させる表現行列
= matrix(c(cos(pi/4), sin(pi/4), -sin(pi/4), cos(pi/4)),2,2)
A = matrix(c(1,0,0,1),2,2)
B <- A %*% B
res <- B[1,1] ; yend1 <- B[2,1]
xend1 <- B[1,2] ; yend2 <- B[2,2]
xend2 <- res[1,1] ; res21 <- res[2,1]
res11 <- res[1,2] ; res22 <- res[2,2]
res12
ggplot() +
geom_segment(aes(x = 0, y = 0, xend = xend1, yend = yend1),
arrow = arrow(), col = "Gray", size = 1) +
annotate("text", x = xend1, y = yend1+0.1, label = "b1", size = 7) +
geom_segment(aes(x = 0, y = 0, xend = res11, yend = res21),
arrow = arrow(), col = "Black", size = 1) +
annotate("text", x = res11, y = res21+0.1, label = "A*b1", size = 7) +
geom_curve(aes(x = xend1/3, y = yend1/3, xend = res11/3, yend = res21/3),
arrow = arrow(), col = "Gray", size = 1, curvature = .25) +
geom_segment(aes(x = 0, y = 0, xend = xend2, yend = yend2),
arrow = arrow(), col = "Gray", size = 1) +
annotate("text", x = xend2, y = yend2+0.1, label = "b2", size = 7) +
geom_segment(aes(x = 0, y = 0, xend = res12, yend = res22),
arrow = arrow(), col = "Black", size = 1) +
annotate("text", x = res12, y = res22+0.1, label = "A*b2", size = 7) +
geom_curve(aes(x = xend2/3, y = yend2/3, xend = res12/3, yend = res22/3),
arrow = arrow(), col = "Gray", size = 1, curvature = .25) +
coord_fixed(ratio = 1) +
ylim(c(-0.2,1.25)) + xlim(c(-1, 1.2)) +
theme_bw()
つまりこのように回転させている。
<- function(a, b, c, d){
linear_trans <- matrix(c(a, b, c, d), nrow = 2, ncol = 2)
Represent <- c()
data for (i in seq(0,2,0.1)){
<- c()
subdata for(j in seq(0,2,0.1)){
<- c(i, j)
Vec <- Represent %*% Vec
Mapping <- c(Vec, Mapping)
temp <- rbind(subdata, temp)
subdata
}<- rbind(data, subdata)
data
}return(data)
}
<- linear_trans(cos(pi/4), sin(pi/4), -sin(pi/4), cos(pi/4)) #45°回転
df <- data.frame(df) %>% select(X1,X2) %>% mutate(id = "pre")
pre <- data.frame(df) %>% select(X3,X4) %>% rename(X1 = X3, X2 = X4) %>% mutate(id = "post") %>% bind_rows(pre)
df
ggplot(df, aes(x = X1, y = X2, col = id)) +
geom_point() +
coord_fixed(ratio = 1) +
theme_bw()
主成分分析では固有値が出てくる。計算方法によって異なりますが、下記は共分散行列から固有値、固有ベクトルを計算した例。固有値ベクトルは、直交回転する表現行列と同じなので、回転後の図も掲載。固有値の総和は全分散と等しく、固有ベクトルを用いた直交行列の回転には、各次元の分散が順番に大きくなる性質があるので、分散を最も大きくなるような軸を見つけ、点を捉え直すことで、順に分散が大きな軸で成分を表現する。
library(mvtnorm)
library(gridExtra)
<- matrix(c(1,0.8,0.8,1), ncol = 2)
sig <- rmvnorm(n = 5000, mean = c(0,0), sigma = sig)
d <- scale(d)
d
<- cov(d[,1:2])
cov_mat <- eigen(cov_mat)
eig
<- d %>%
c1 as_data_frame() %>%
ggplot(., aes(d[,1], d[,2])) +
geom_point(alpha = 0.1) +
geom_segment(aes(x = 0, y = 0,
xend = eig$vectors[1,1], yend = eig$vectors[2,1]),
arrow = arrow(), col = "#0070B9", size = 1) +
geom_segment(aes(x = 0, y = 0,
xend = eig$vectors[1,2], yend = eig$vectors[2,2]),
arrow = arrow(), col = "#0070B9", size = 1) +
coord_fixed(ratio = 1) +
theme_bw()
#固有値ベクトルで直交回転
<- d %*% eig$vectors
dd
<- dd %>%
c2 as_data_frame() %>%
ggplot(., aes(dd[,1], dd[,2])) +
geom_point(alpha = 0.1) +
geom_segment(aes(x = 0, y = 0, xend = 1, yend = 0),
arrow = arrow(), col = "#0070B9", size = 1) +
geom_segment(aes(x = 0, y = 0, xend = 0, yend = 1),
arrow = arrow(), col = "#0070B9", size = 1) +
ylim(c(-5,5)) + xlim(c(-5,5)) + coord_fixed(ratio = 1) +
theme_bw()
grid.arrange(c1, c2, ncol = 2)
分散が最大になる固有ベクトルを探すイメージ。各ポイントから垂線をおろし、その座標での分散が最大になる軸=固有ベクトル。
set.seed(1234)
<- matrix(c(1,0.8,0.8,1), ncol = 2)
sig <- rmvnorm(n = 100, mean = c(0,0), sigma = sig)
d <- round(scale(d),1)
d
<- c(eigen(cov(d))$vectors[1,1],
eigen_vec eigen(cov(d))$vectors[2,1])
<- as.matrix(d) %*% eigen_vec
conv <- conv * eigen_vec[1] #垂線足のx座標
xx <- conv * eigen_vec[2] #垂線足のy座標
yy <- which.max(d[,2])
id
%>% as.data.frame() %>%
d ggplot(., aes(d[,1], d[,2])) +
geom_abline(slope = eigen_vec[2] / eigen_vec[1], linetype = 2) +
geom_segment(aes(x = xx, y = yy, xend = d[,1], yend = d[,2]), col = "gray") +
geom_segment(aes(x = 0, y = 0, xend = d[id,1], yend = d[id,2]), arrow = arrow(), col = "red") +
geom_segment(aes(x = xx[id], y = yy[id], xend = d[id,1], yend = d[id,2]), arrow = arrow(), col = "red") +
geom_segment(aes(x = 0, y = 0, xend = xx[id], yend = yy[id]), arrow = arrow(), col = "red") +
geom_point(size = 3) +
coord_fixed(ratio = 1) +
theme_bw()