UPDATE: 2022-08-23 12:44:00

はじめに

ここでは重回帰分析の偏回帰係数を分解する。偏回帰係数は「他の変数の影響を除いた影響」という表現をよく見るが、これがどのような意味なのか、数値例をまとめておく。予め必要なデータをここで読み込んでおく。

library(palmerpenguins)
library(tidyverse)
library(broom)

d <- penguins %>% 
  dplyr::filter(species == 'Adelie' & sex == 'male' & year == '2007') %>% 
  tidyr::drop_na() %>% 
  dplyr::select(x1 = bill_length_mm,
         x2 = bill_depth_mm,
         x3 = flipper_length_mm,
         y = body_mass_g)

重回帰分析

2変数の重回帰分析

broom::tidy(lm(y ~ x1, d))
## # A tibble: 2 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)   3201.     1364.      2.35   0.0293
## 2 x1              21.0      34.1     0.615  0.545
broom::tidy(lm(x2 ~ x1, d))
## # A tibble: 2 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)   14.4       4.46       3.24 0.00414
## 2 x1             0.127     0.112      1.14 0.269
broom::tidy(lm(y ~ x1 + x2, d))
## # A tibble: 3 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)  1948.      1656.      1.18    0.254
## 2 x1              9.96      34.6     0.288   0.776
## 3 x2             86.8       67.2     1.29    0.212
# x1の偏回帰係数の分解
# 「y~x1のx1の回帰係数」は「下記1,2の和」
# 1. 「y~x1+x2のx1の偏回帰係数」と
# 2. 「(x2~x1のx1の回帰係数)と(y~x1+x2のx2の偏回帰係数)の積」

y_to_x1_slope  <- lm(y ~ x1, d)$coef[[2]]
y_to_x1_pslope <- lm(y ~ x1 + x2, d)$coef[[2]]
x2_to_x1_slope <- lm(x2 ~ x1, d)$coef[[2]]
y_to_x2_pslope <- lm(y ~ x1 + x2, d)$coef[[3]]

all.equal(
  y_to_x1_slope,
  y_to_x1_pslope + (x2_to_x1_slope * y_to_x2_pslope)
  )
## [1] TRUE
# x2の偏回帰係数の分解
# 「y~x2のx2の回帰係数」は「下記1,2の和」
# 1. 「y~x1+x2のx2の偏回帰係数」と
# 2. 「(x1~x2のx2の回帰係数)と(y~x1+x2のx1の偏回帰係数)の積」
y_to_x2_slope  <- lm(y ~ x2, d)$coef[[2]]
y_to_x2_pslope <- lm(y ~ x1 + x2, d)$coef[[3]]
x2_to_x1_slope <- lm(x1 ~ x2, d)$coef[[2]]
y_to_x1_pslope <- lm(y ~ x1 + x2, d)$coef[[2]]
all.equal(
  y_to_x2_slope, 
  y_to_x2_pslope + (x2_to_x1_slope * y_to_x1_pslope)
  )
## [1] TRUE

3変数の重回帰分析

broom::tidy(lm( y ~ x1, d))
## # A tibble: 2 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)   3201.     1364.      2.35   0.0293
## 2 x1              21.0      34.1     0.615  0.545
broom::tidy(lm(x2 ~ x1, d))
## # A tibble: 2 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept)   14.4       4.46       3.24 0.00414
## 2 x1             0.127     0.112      1.14 0.269
broom::tidy(lm(x3 ~ x1, d))
## # A tibble: 2 × 5
##   term        estimate std.error statistic    p.value
##   <chr>          <dbl>     <dbl>     <dbl>      <dbl>
## 1 (Intercept)  162.       23.9        6.76 0.00000143
## 2 x1             0.667     0.597      1.12 0.278
broom::tidy(lm( y ~ x1 + x2 + x3, d))
## # A tibble: 4 × 5
##   term        estimate std.error statistic p.value
##   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept) -2548.      2161.    -1.18    0.254 
## 2 x1              1.97      29.9    0.0658  0.948 
## 3 x2            -70.7       81.1   -0.871   0.395 
## 4 x3             41.9       15.2    2.77    0.0127
# 「y~x1のx1の回帰係数」は「下記1,2,3の和」
# 1. 「y~x1+x2+x3のx1の偏回帰係数」と
# 2. 「(x2~x1のx1の回帰係数)と(y~x1+x2+x3のx2の偏回帰係数)の積」と
# 3. 「(x3~x1のx1の回帰係数)と(y~x1+x2+x3のx3の偏回帰係数)の積」

y_to_x1_slope <- lm(y ~ x1, d)$coef[[2]]
y_to_x1_pslope <- lm(y ~ x1 + x2 + x3, d)$coef[[2]]
x2_to_x1_slope <- lm(x2 ~ x1, d)$coef[[2]]
y_to_x2_pslope <- lm(y ~ x1 + x2 + x3, d)$coef[[3]]
x3_to_x1_slope <- lm(x3 ~ x1, d)$coef[[2]]
y_to_x3_pslope <- lm(y ~ x1 + x2 + x3, d)$coef[[4]]

all.equal(
  y_to_x1_slope,
  y_to_x1_pslope + (x2_to_x1_slope * y_to_x2_pslope) + (x3_to_x1_slope * y_to_x3_pslope)
  )
## [1] TRUE