UPDATE: 2022-08-23 12:44:00
ここでは重回帰分析の偏回帰係数を分解する。偏回帰係数は「他の変数の影響を除いた影響」という表現をよく見るが、これがどのような意味なのか、数値例をまとめておく。予め必要なデータをここで読み込んでおく。
library(palmerpenguins)
library(tidyverse)
library(broom)
<- penguins %>%
d ::filter(species == 'Adelie' & sex == 'male' & year == '2007') %>%
dplyr::drop_na() %>%
tidyr::select(x1 = bill_length_mm,
dplyrx2 = bill_depth_mm,
x3 = flipper_length_mm,
y = body_mass_g)
::tidy(lm(y ~ x1, d)) broom
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 3201. 1364. 2.35 0.0293
## 2 x1 21.0 34.1 0.615 0.545
::tidy(lm(x2 ~ x1, d)) broom
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 14.4 4.46 3.24 0.00414
## 2 x1 0.127 0.112 1.14 0.269
::tidy(lm(y ~ x1 + x2, d)) broom
## # A tibble: 3 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 1948. 1656. 1.18 0.254
## 2 x1 9.96 34.6 0.288 0.776
## 3 x2 86.8 67.2 1.29 0.212
# x1の偏回帰係数の分解
# 「y~x1のx1の回帰係数」は「下記1,2の和」
# 1. 「y~x1+x2のx1の偏回帰係数」と
# 2. 「(x2~x1のx1の回帰係数)と(y~x1+x2のx2の偏回帰係数)の積」
<- lm(y ~ x1, d)$coef[[2]]
y_to_x1_slope <- lm(y ~ x1 + x2, d)$coef[[2]]
y_to_x1_pslope <- lm(x2 ~ x1, d)$coef[[2]]
x2_to_x1_slope <- lm(y ~ x1 + x2, d)$coef[[3]]
y_to_x2_pslope
all.equal(
y_to_x1_slope,+ (x2_to_x1_slope * y_to_x2_pslope)
y_to_x1_pslope )
## [1] TRUE
# x2の偏回帰係数の分解
# 「y~x2のx2の回帰係数」は「下記1,2の和」
# 1. 「y~x1+x2のx2の偏回帰係数」と
# 2. 「(x1~x2のx2の回帰係数)と(y~x1+x2のx1の偏回帰係数)の積」
<- lm(y ~ x2, d)$coef[[2]]
y_to_x2_slope <- lm(y ~ x1 + x2, d)$coef[[3]]
y_to_x2_pslope <- lm(x1 ~ x2, d)$coef[[2]]
x2_to_x1_slope <- lm(y ~ x1 + x2, d)$coef[[2]]
y_to_x1_pslope all.equal(
y_to_x2_slope, + (x2_to_x1_slope * y_to_x1_pslope)
y_to_x2_pslope )
## [1] TRUE
::tidy(lm( y ~ x1, d)) broom
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 3201. 1364. 2.35 0.0293
## 2 x1 21.0 34.1 0.615 0.545
::tidy(lm(x2 ~ x1, d)) broom
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 14.4 4.46 3.24 0.00414
## 2 x1 0.127 0.112 1.14 0.269
::tidy(lm(x3 ~ x1, d)) broom
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 162. 23.9 6.76 0.00000143
## 2 x1 0.667 0.597 1.12 0.278
::tidy(lm( y ~ x1 + x2 + x3, d)) broom
## # A tibble: 4 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -2548. 2161. -1.18 0.254
## 2 x1 1.97 29.9 0.0658 0.948
## 3 x2 -70.7 81.1 -0.871 0.395
## 4 x3 41.9 15.2 2.77 0.0127
# 「y~x1のx1の回帰係数」は「下記1,2,3の和」
# 1. 「y~x1+x2+x3のx1の偏回帰係数」と
# 2. 「(x2~x1のx1の回帰係数)と(y~x1+x2+x3のx2の偏回帰係数)の積」と
# 3. 「(x3~x1のx1の回帰係数)と(y~x1+x2+x3のx3の偏回帰係数)の積」
<- lm(y ~ x1, d)$coef[[2]]
y_to_x1_slope <- lm(y ~ x1 + x2 + x3, d)$coef[[2]]
y_to_x1_pslope <- lm(x2 ~ x1, d)$coef[[2]]
x2_to_x1_slope <- lm(y ~ x1 + x2 + x3, d)$coef[[3]]
y_to_x2_pslope <- lm(x3 ~ x1, d)$coef[[2]]
x3_to_x1_slope <- lm(y ~ x1 + x2 + x3, d)$coef[[4]]
y_to_x3_pslope
all.equal(
y_to_x1_slope,+ (x2_to_x1_slope * y_to_x2_pslope) + (x3_to_x1_slope * y_to_x3_pslope)
y_to_x1_pslope )
## [1] TRUE