UPDATE: 2024-02-25 04:51:25.031974
このノートはでは、dnorm,rnorm,pnorm,qnorm
についてまとめている。特にpnorm,qnorm
は毎回どっちがどっちかわからなくなるので、可視化を交えながらまとめておく。
接頭辞の意味は下記の通り。
-d*
: パラメタと分布をもとに、確率密度を返す。
-r*
: パラメタと分布をもとに、乱数を返す。 -p*
:
パラメタと分布をもとに、分位数に対する累積確率を返す。p
なので確率を返す。
-q*
:
パラメタと分布をもとに、累積確率に対する分位数を返す。q
なので分位数を返す。
これら4つの関数に関して、標準正規分布をもとに可視化を行う。下記を参考にしている。
library(tidyverse)
library(patchwork)
x <- seq(-5, 5, by = 0.01)
mu <- 0
sd <- 1
normal_dists <-
list(
`dnorm()` = ~ dnorm(., mu, sd),
`rnorm()` = ~ dnorm(., mu, sd),
`pnorm()` = ~ pnorm(., mu, sd),
`qnorm()` = ~ pnorm(., mu, sd)
)
df <- tibble(x, mu, sd) %>%
mutate_at(.vars = vars(x), .funs = normal_dists) %>%
pivot_longer(cols = -c(x, mu, sd), names_to = 'func', values_to = 'prob') %>%
mutate(distribution = ifelse(
func == 'pnorm()' | func == 'qnorm()',
'Cumulative probability',
'Probability density')
)
df_pdf <- df %>%
filter(distribution == 'Probability density') %>%
rename(`Probabilitiy density` = prob)
df_cdf <- df %>%
filter(distribution == 'Cumulative probability') %>%
rename(`Cumulative probability` = prob)
df_dnorm <- tibble(
x_start.line_1 = c(-1.96, 0, 1),
pd_start.line_1 = 0) %>%
mutate(x_end.line_1 = x_start.line_1,
pd_end.line_1 = dnorm(x_end.line_1, mu, sd),
x_start.line_2 = x_end.line_1,
pd_start.line_2 = pd_end.line_1,
x_end.line_2 = min(x),
pd_end.line_2 = pd_start.line_2,
id = 1:n()) %>%
pivot_longer(-id) %>%
separate(name, into = c('source', 'line'), sep = '\\.') %>%
pivot_wider(id_cols = c(id, line), names_from = source) %>%
mutate(
func = 'dnorm()',
size = ifelse(line == 'line_1', 0, 0.03)
)
set.seed(1)
df_rnorm <- tibble(x_start = rnorm(3, mu, sd)) %>%
mutate(
pd_start = dnorm(x_start, mu, sd),
x_end = x_start,
pd_end = 0,
func = 'rnorm()'
)
df_pnorm <- tibble(x_start.line_1 = c(-1.96, 0, 1.96),
pd_start.line_1 = 0) %>%
mutate(x_end.line_1 = x_start.line_1,
pd_end.line_1 = pnorm(x_end.line_1, mu, sd),
x_start.line_2 = x_end.line_1,
pd_start.line_2 = pd_end.line_1,
x_end.line_2 = min(x),
pd_end.line_2 = pd_start.line_2,
id = 1:n()) %>%
pivot_longer(-id) %>%
separate(name, into = c('source', 'line'), sep = '\\.') %>%
pivot_wider(id_cols = c(id, line), names_from = source) %>%
mutate(
func = 'pnorm()',
size = ifelse(line == 'line_1', 0, 0.03)
)
df_qnorm <- tibble(x_start.line_1 = min(x),
pd_start.line_1 = c(0.025, 0.5, 0.975)) %>%
mutate(x_end.line_1 = qnorm(pd_start.line_1),
pd_end.line_1 = pd_start.line_1,
x_start.line_2 = x_end.line_1,
pd_start.line_2 = pd_end.line_1,
x_end.line_2 = x_end.line_1,
pd_end.line_2 = 0,
id = 1:n()) %>%
pivot_longer(-id) %>%
separate(name, into = c('source', 'line'), sep = '\\.') %>%
pivot_wider(id_cols = c(id, line), names_from = source) %>%
mutate(
func = 'qnorm()',
size = ifelse(line == 'line_1', 0, 0.03)
)
p_pdf <- df_pdf %>%
ggplot(aes(x, `Probabilitiy density`, color = func)) +
geom_segment(data = df_dnorm,
aes(x_start, pd_start, xend = x_end, yend = pd_end),
arrow = arrow(length = unit(df_dnorm$size, 'npc'), type = 'closed')) +
geom_segment(data = df_rnorm,
aes(x_start, pd_start, xend = x_end, yend = pd_end),
arrow = arrow(length = unit(0.03, 'npc'), type = 'closed')) +
geom_line(col = 'black') +
facet_wrap(~ func, nrow = 1) +
theme_bw()
p_cdf <- df_cdf %>%
ggplot(aes(x, `Cumulative probability`, color = func)) +
geom_segment(data = df_pnorm,
aes(x_start, pd_start, xend = x_end, yend = pd_end),
arrow = arrow(length = unit(df_dnorm$size, 'npc'), type = 'closed')) +
geom_segment(data = df_qnorm,
aes(x_start, pd_start, xend = x_end, yend = pd_end),
arrow = arrow(length = unit(df_qnorm$size, 'npc'), type = 'closed')) +
geom_line(col = 'black') +
facet_wrap(~ func, nrow = 1) +
labs(x = 'x/quantiles') +
theme_bw()
p_pdf + p_cdf + plot_layout(ncol = 1)