UPDATE: 2021-09-24 22:07:56
<- matrix(c(1,3,1,1,4,5,26,
D 1,2,1,3,1,3,22,
2,3,1,1,1,5,27,
1,2,2,1,1,5,34,
1,2,2,2,3,2,28,
3,4,1,1,3,5,20,
4,1,2,3,4,4,18,
4,4,1,2,4,4,30,
2,3,2,4,2,4,23,
1,2,2,1,4,1,32),
nrow = 10, ncol = 7, byrow = TRUE)
colnames(D) <- paste0("Q", 1:7)
rownames(D) <- paste0("ID", 1:10)
# 1.1のアンケートデータ
D
## Q1 Q2 Q3 Q4 Q5 Q6 Q7
## ID1 1 3 1 1 4 5 26
## ID2 1 2 1 3 1 3 22
## ID3 2 3 1 1 1 5 27
## ID4 1 2 2 1 1 5 34
## ID5 1 2 2 2 3 2 28
## ID6 3 4 1 1 3 5 20
## ID7 4 1 2 3 4 4 18
## ID8 4 4 1 2 4 4 30
## ID9 2 3 2 4 2 4 23
## ID10 1 2 2 1 4 1 32
# 回答者1
1, 1:2] D[
## Q1 Q2
## 1 3
# 回答者2
2, 1:2] D[
## Q1 Q2
## 1 2
# 回答者3
3, 1:2] D[
## Q1 Q2
## 2 3
# 設問1
1:3, 1] D[
## ID1 ID2 ID3
## 1 1 2
# 設問2
1:3, 2] D[
## ID1 ID2 ID3
## 3 2 3
\[ \begin{eqnarray} \boldsymbol{ a } =\left(\begin{array}{c} 3 \\ 4 \end{array}\right), \boldsymbol{ b } =\left(\begin{array}{c} -1 \\ -5 \end{array}\right), \boldsymbol{ c } =\left(\begin{array}{c} -3 \\ 2 \end{array}\right), \boldsymbol{ d } =\left(\begin{array}{c} 4 \\ 3 \end{array}\right), \boldsymbol{ g } =\left(\begin{array}{c} -3 \\ 2 \\ 0 \end{array}\right), \boldsymbol{ h } =\left(\begin{array}{c} 3 \\ 5 \\ -1 \end{array}\right), \boldsymbol{ p } =\left(\begin{array}{c} -3 \\ 2 \\ 0 \end{array}\right) \end{eqnarray} \]
<- c(3,4)
a <- c(-1,-5)
b <- c(-3,2)
c <- c(4,3)
d <- c(-3,2,0)
g <- c(3,5,-1)
h <- c(-3,2,0) p
# 等しい
# 等しくない
<- function(x) sqrt(sum(x^2))
norm # sqrt(26)
norm(b)
## [1] 5.09902
# sqrt(35)
norm(h)
## [1] 5.91608
-5 * d
## [1] -20 -15
library(matlib)
<- c(-10,10)
xlim <- c(-20,10)
ylim plot(xlim, ylim, type = "n", xlab = "X", ylab = "Y", asp = 1)
abline(v = 0, h = 0, col = "gray")
grid()
vectors(rbind(d, -5*d), col = c("red", "blue"))
+ b a
## [1] 2 -1
<- c(-10,10)
xlim <- c(-10,10)
ylim plot(xlim, ylim, type = "n", xlab = "X", ylab = "Y", asp = 1)
abline(v = 0, h = 0, col = "gray")
grid()
vectors(rbind(a, b, "a+b" = a + b), col = c("red", "blue", "darkgreen"))
- b c
## [1] -2 7
<- c(-10,10)
xlim <- c(-10,10)
ylim plot(xlim, ylim, type = "n", xlab = "X", ylab = "Y", asp = 1)
abline(v = 0, h = 0, col = "gray")
grid()
vectors(rbind(c, b, "c-b" = c + (-1)*b), col = c("red", "blue", "darkgreen"))
+ p g
## [1] -6 4 0
<- c(-10,10)
xlim <- c(-10,10)
ylim plot(xlim, ylim, type = "n", xlab = "X", ylab = "Y", asp = 1)
abline(v = 0, h = 0, col = "gray")
grid()
vectors(rbind(g, p, "g+p" = g + p), col = c("red", "blue", "darkgreen"))
# 定義されない
# sqrt(50)
norm(d - c)
## [1] 7.071068
# sqrt(50)
norm(2*d + b)
## [1] 7.071068
<- norm(g)
norm_g <- norm(a)
norm_a # sqrt(13) - sqrt(25)
- norm_a norm_g
## [1] -1.394449
/ norm(a) a
## [1] 0.6 0.8
-3*d - b + 2*a
## [1] -5 4
- 2*g + 5*h p
## [1] 18 23 -5
# 定義されない
\[ \begin{eqnarray} \boldsymbol{ h } =\left(\begin{array}{c} 3 \\ 5 \\ -1 \end{array}\right) \end{eqnarray} \]
<- c(3,5,-1)
h <- c(1,0,0)
e1 <- c(0,1,0)
e2 <- c(0,0,1)
e3
# 3*e1 + 5*e2 + (-1)*e3
1]*e1 + h[2]*e2 + h[3]*e3 h[
## [1] 3 5 -1
\[ \begin{eqnarray} \boldsymbol{ a } =\left(\begin{array}{c} 3 \\ 4 \end{array}\right), \boldsymbol{ b } =\left(\begin{array}{c} -1 \\ -5 \end{array}\right), \boldsymbol{ c } =\left(\begin{array}{c} -3 \\ 2 \end{array}\right), \boldsymbol{ d } =\left(\begin{array}{c} 4 \\ 3 \end{array}\right), \boldsymbol{ g } =\left(\begin{array}{c} -3 \\ 2 \\ 0 \end{array}\right), \boldsymbol{ h } =\left(\begin{array}{c} 3 \\ 5 \\ -1 \end{array}\right), \boldsymbol{ p } =\left(\begin{array}{c} -3 \\ 2 \\ 0 \end{array}\right) \end{eqnarray} \] ### 2.4.1 (c,b)
# c %*% b
t(matrix(c)) %*% matrix(b)
## [,1]
## [1,] -7
# 定義されない
t(matrix(2*p)) %*% matrix(h)
## [,1]
## [1,] 2
# cos = (c・d) / ||c||*||b||
<- norm(c)
norm_c <- norm(b)
norm_b <- as.vector(t(matrix(c)) %*% matrix(b))
innerdot_cb
# -7/(sqrt(13)*sqrt(26))
/ (norm_c * norm_b) innerdot_cb
## [1] -0.3807498
<- c(-5,5)
xlim <- c(-5,5)
ylim plot(xlim, ylim, type = "n", xlab = "X", ylab = "Y", asp = 1)
abline(v = 0, h = 0, col = "gray")
grid()
vectors(rbind(c, b), col = c("red", "blue"))
<- norm(p)
norm_p <- norm(g)
norm_g <- as.vector(t(matrix(p)) %*% matrix(g))
innerdot_pg
# 13/(sqrt(13)*sqrt(13))
/ (norm_p * norm_g) innerdot_pg
## [1] 1
<- c(-5,5)
xlim <- c(-5,5)
ylim plot(xlim, ylim, type = "n", xlab = "X", ylab = "Y", asp = 1)
abline(v = 0, h = 0, col = "gray")
grid()
vectors(rbind(p, g), col = c("red", "blue"))
<- D[6:10, 2]
D2 <- D[6:10, 6]
D6
<- mean(D2)
m2 <- mean(D6)
m6
<- D2 - m2
D2_hensa <- D6 - m6
D6_hensa D2_hensa;D6_hensa
## ID6 ID7 ID8 ID9 ID10
## 1.2 -1.8 1.2 0.2 -0.8
## ID6 ID7 ID8 ID9 ID10
## 1.4 0.4 0.4 0.4 -2.6
# sqrt(6.8)
norm(D2_hensa)
## [1] 2.607681
# sqrt(9.2)
norm(D6_hensa)
## [1] 3.03315
<- as.vector(t(matrix(D2 - m2)) %*% matrix(D6 - m6))
innerdot_hensa26 innerdot_hensa26
## [1] 3.6
変数\(\boldsymbol{x}\)と変数\(\boldsymbol{y}\)の相関係数は下記の通り定義されます。
\[ r_{xy} = \frac{\sum(x_{i} - \bar{x})\sum(y_{i} - \bar{y})} { \sqrt{\sum(x_{i} - \bar{x})^{2} \sum(y_{i} - \bar{y})^{2}} } \]
そして、分母の標準偏差は下記のように書き換えられます。
\[ \sqrt{\sum(x_{i} - \bar{x})^{2}}=\sqrt{n}s_{x}\\ \sqrt{\sum(y_{i} - \bar{y})^{2}}=\sqrt{n}s_{y} \]
標準偏差を使って標準化を行えることを考えると、
\[ \frac{(x_{i} - \bar{x})}{s_{x}} = z_{x} \\ \frac{(y_{i} - \bar{y})}{s_{y}} = z_{y} \]
相関係数は下記のようにも書くことができます。標準化した変数の積和をベクトルの数で割ったものです。
\[ \begin{eqnarray} r_{xy} &=& \frac{\sum(x_{i} - \bar{x}) (y_{i} - \bar{y})} { \sqrt{\sum(x_{i} - \bar{x})^{2} \sum(y_{i} - \bar{y})^{2}} } \\ &=& \sum \left( \frac{(x_{i} - \bar{x})}{ \sqrt{\sum(x_{i} - \bar{x})^2} } \frac{(y_{i} - \bar{y})}{ \sqrt{\sum(y_{i} - \bar{y})^2} } \right) \\ &=& \sum \frac{(x_{i} - \bar{x})}{\sqrt{n}s_{x}} \frac{(y_{i} - \bar{y})}{\sqrt{n}s_{y}} \\ &=& \sum \frac{z_{x}}{\sqrt{n}}\frac{z_{y}}{\sqrt{n}} \\ &=& \sum \frac{z_{x} z_{y}}{n} \end{eqnarray} \]
相関係数の分子は偏差ベクトルの内積であり、偏差ベクトルを\(\boldsymbol{x}_{(m)}\)と\(\boldsymbol{y}_{(m)}\)として表すと、
\[ (\boldsymbol{x}_{(m)}, \boldsymbol{y}_{(m)}) = \sum(x_{i}-\bar{x})(y_{i}-\bar{y}) \]
相関係数の分母は各偏差ベクトルの「長さ」の積である。
\[ ||\boldsymbol{x}_{(m)}|| = \sqrt{\sum(x_{i} - \bar{x})^{2}} = \sqrt{n} s_{x} \\ ||\boldsymbol{y}_{(m)}|| = \sqrt{\sum(y_{i} - \bar{y})^{2}} = \sqrt{n} s_{y} \]
つまり、相関係数は下記のように内積とベクトルの長さを使っても表現できる。
\[ r_{xy} = \frac{(\boldsymbol{x}_{(m)}, \boldsymbol{y}_{(m)})}{||\boldsymbol{x}_{(m)}|| ||\boldsymbol{y}_{(m)}||} \]
cor(D2, D6)
## [1] 0.4551496
/ (norm(D2_hensa) * norm(D6_hensa)) innerdot_hensa26
## [1] 0.4551496
共分散は下記のように定義される。
\[ cov_{xy} = \frac{1}{n} \sum (x_{i} - \bar{x})(y_{i} - \bar{y}) = \frac{(\boldsymbol{x}_{(m)}, \boldsymbol{y}_{(m)})}{n} \]
/ length(D2) innerdot_hensa26
## [1] 0.72
<- D[1:3, 1]
D1
# sqrt(6)
norm(D1)
## [1] 2.44949
<- D[1:3, 2]
D2
# sqrt(22)
norm(D2)
## [1] 4.690416
- D2 D1
## ID1 ID2 ID3
## -2 -1 -1
# sqrt(6)
norm(D1 - D2)
## [1] 2.44949
2, 1:2] - D[3, 1:2] D[
## Q1 Q2
## -1 -1
# sqrt(2)
norm(D[2, 1:2] - D[3, 1:2])
## [1] 1.414214
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] ja_JP.UTF-8/ja_JP.UTF-8/ja_JP.UTF-8/C/ja_JP.UTF-8/ja_JP.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] matlib_0.9.5
##
## loaded via a namespace (and not attached):
## [1] zip_2.1.1 Rcpp_1.0.6 highr_0.8 cellranger_1.1.0
## [5] compiler_4.0.3 pillar_1.6.2 forcats_0.5.1 tools_4.0.3
## [9] digest_0.6.27 jsonlite_1.7.2 evaluate_0.14 lifecycle_1.0.0
## [13] tibble_3.1.3 pkgconfig_2.0.3 rlang_0.4.10 openxlsx_4.2.3
## [17] crosstalk_1.1.1 curl_4.3 yaml_2.2.1 haven_2.3.1
## [21] xfun_0.24 rio_0.5.16 stringr_1.4.0 knitr_1.33
## [25] vctrs_0.3.8 htmlwidgets_1.5.3 systemfonts_1.0.2 hms_1.0.0
## [29] data.table_1.13.6 R6_2.5.0 textshaping_0.3.5 fansi_0.4.2
## [33] readxl_1.3.1 rgl_0.107.10 foreign_0.8-80 rmarkdown_2.6
## [37] carData_3.0-4 car_3.0-10 magrittr_2.0.1 htmltools_0.5.1.1
## [41] ellipsis_0.3.2 MASS_7.3-53 abind_1.4-5 xtable_1.8-4
## [45] ragg_1.1.3 utf8_1.1.4 stringi_1.5.3 crayon_1.4.0