UPDATE: 2023-04-30 15:37:59
ここでは非線形最小二乗法を用いて、非線形な関数のフィッティングについての実装をまとめる。使用しているパッケージはnls
パッケージではなく、gslnls
パッケージ。便利な計算アルゴリズムや漸近信頼区間と予測区間の計算ができる便利なパッケージ。
このノートは、非線形最小二乗法の理論やデータに対する関数についての説明もない。ただただ実装をまとめているだけ。必要なライブラリを読み込んでおく。
library(NISTnls)
library(gslnls)
library(tidyverse)
library(aomisc)
set.seed(1989)
<- seq(0, 50, 1)
x <- ((runif(1, 10, 20)*x)/(runif(1, 0, 10) + x)) + rnorm(n = length(x), 0, 1)
y <- data.frame(y, x)
MichaelisMenten
<- gsl_nls(
MichaelisMenten_nsl fn = y ~ (b1 * x / (b2 + x)),
data = MichaelisMenten,
start = c(b1 = 1, b2 = 1),
control = gsl_nls_control(maxiter = 50),
algorithm = 'lm'
)
# as.matrix(coef(MichaelisMenten_nsl))
cbind(
MichaelisMenten, predict(MichaelisMenten_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit), linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: MichaelisMenten')
<- data.frame(
Polynomial x = seq(5, 50, 5),
y = c(12.6, 74.1, 157.6, 225.5, 303.4, 462.8, 669.9, 805.3, 964.2, 1169)
)
<- gsl_nls(
Polynomial_nsl fn = y ~ a + b*x + c*x^2,
data = Polynomial,
start = c(a = 1, b = 1, c = 1),
control = gsl_nls_control(maxiter = 50),
algorithm = 'lm'
)
# as.matrix(coef(Polynomial_nsl))
cbind(
Polynomial, predict(Polynomial_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Polynomial')
<- structure(list(
ExponentialDecay x = c(0L, 10L, 20L, 30L, 40L, 50L, 60L, 70L, 0L, 10L, 20L, 30L, 40L, 50L, 60L, 70L, 0L, 10L, 20L, 30L, 40L, 50L, 60L, 70L),
y = c(96.4, 46.3, 21.2, 17.89, 10.1, 6.9, 3.5, 1.9, 102.3, 49.2, 26.31, 14.22, 5.4, 3.4, 0.5, 0.2, 101.33, 54.89, 28.12, 13.33, 6.11, 0.35, 2.1, 0.922)),
class = "data.frame", row.names = c(NA, -24L)
)
<- gsl_nls(
ExponentialDecay_nsl fn = y ~ a*exp(k*x),
data = ExponentialDecay,
start = c(a = 100, k = 0.01),
control = gsl_nls_control(maxiter = 100),
algorithm = 'lm'
)
# as.matrix(coef(ExponentialDecay_nsl))
cbind(
ExponentialDecay, predict(ExponentialDecay_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: ExponentialDecay')
<- data.frame(
Asymptotic x = c(1, 3, 5, 7, 9, 11, 13, 20),
y = c(8.22, 14.0, 17.2, 16.9, 19.2, 19.6, 19.4, 19.6)
)
<- gsl_nls(
Asymptotic_nsl fn = y ~ a-(a-b)*exp(-c*x),
data = Asymptotic,
start = c(a = 1, b = 1, c = 0.01),
control = gsl_nls_control(maxiter = 1000),
algorithm = 'lm'
)
# as.matrix(coef(Asymptotic_nsl))
cbind(
Asymptotic, predict(Asymptotic_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Asymptotic')
<- structure(list(
Power x = c(1L, 2L, 4L, 8L, 16L, 32L, 64L, 128L, 256L),
y = c(4, 5, 7, 8, 10, 14, 19, 22, 26)),
row.names = c(NA, -9L), class = "data.frame")
<- gsl_nls(
Power_nsl fn = y ~ a*x^b,
data = Power,
start = c(a = 0.1, b = 0.01),
control = gsl_nls_control(maxiter = 1000),
algorithm = 'lm'
)
# as.matrix(coef(Power_nsl))
cbind(
Power, predict(Power_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Asymptotic')
<- data.frame(
Logarithmic x = c(1,2,4,5,7,12),
y = c(1.97, 2.32, 2.67, 2.71, 2.86, 3.09)
)
<- gsl_nls(
Logarithmic_nsl fn = y ~ a + b * log(x),
data = Logarithmic,
start = c(a = 0.1, b = 0.1),
control = gsl_nls_control(maxiter = 1000),
algorithm = 'lm'
)
# as.matrix(coef(Logarithmic_nsl))
cbind(
Logarithmic, predict(Logarithmic_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Logarithmic')
<- data.frame(
Rectangular x = c(0, 5, 7, 22, 28, 39, 46, 200),
y = c(12.74, 13.66, 14.11, 14.43, 14.78, 14.86, 14.78, 14.91)
)
<- gsl_nls(
Rectangular_nsl fn = y ~ a*x/(b+x),
data = Rectangular,
start = c(a = 10, b = 0.5),
control = gsl_nls_control(maxiter = 1000),
algorithm = 'lm'
)
# as.matrix(coef(Rectangular_nsl))
cbind(
Rectangular, predict(Rectangular_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Rectangular')
<- structure(list(
Weibull x = c(0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 2L, 2L,2L, 2L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 10L, 10L, 10L, 10L, 20L, 20L, 20L, 20L, 50L, 50L, 50L, 50L),
y = c(4.74, 4.57, 4.51, 4.5, 4.12, 3.84, 3.13, 2.92, 3.75, 3.18, 2.6, 2.04, 2.46, 1.8, 1.03, 1.02, 1.01, 0.95, 0.86, 0.71, 0.92, 0.85, 0.81, 0.67, 0.65,0.61, 0.6, 0.48, 0.55, 0.44, 0.4, 0.39)),
class = "data.frame", row.names = c(NA, -32L))
<- gsl_nls(
Weibull1_nsl fn = y ~ c + (d-c)*exp(-exp(-b*(log(x)-log(e)))),
data = Weibull,
start = c(b = 1, c = 0.5, d = 5, e = 3),
control = gsl_nls_control(maxiter = 1000),
algorithm = 'lm'
)
# as.matrix(coef(Weibull1_nsl))
cbind(
Weibull, predict(Weibull1_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Weibull type1')
<- gsl_nls(
Weibull2_nsl fn = y ~ c + (d-c)*(1-exp(-exp(b*(log(x)-log(e))))),
data = Weibull,
start = c(b = 1, c = 0.5, d = 5, e = 3),
control = gsl_nls_control(maxiter = 1000),
algorithm = 'lm'
)
# as.matrix(coef(Weibull2_nsl))
cbind(
Weibull, predict(Weibull2_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Weibull type2')
<- data.frame(
Bragg x = c(5, 10, 15, 20, 25, 30, 35, 40, 45, 50),
y = c(0.1, 2, 5.7, 9.3, 19.7, 28.4, 20.3, 6.6, 1.3, 0.1)
)
<- gsl_nls(
Bragg_nsl fn = y ~ d * exp(-b*(x-e)^2),
data = Bragg,
start = c(b = 0.1, d = 30, e = 30),
control = gsl_nls_control(maxiter = 1000),
algorithm = 'lm'
)
# as.matrix(coef(Bragg_nsl))
cbind(
Bragg, predict(Bragg_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Bragg')
<- Bragg
Lorentz
<- gsl_nls(
Lorentz_nsl fn = y ~ d / (1 + b*(x-e)^2),
data = Lorentz,
start = c(b = 0.1, d = 30, e = 30),
control = gsl_nls_control(maxiter = 1000),
algorithm = 'lm'
)
# as.matrix(coef(Lorentz_nsl))
cbind(
Lorentz, predict(Lorentz_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Lorentz')
Betaのデータはgsl_nls()
関数では収束させることができなかったため、aomisc
パッケージの関数を利用している。このパッケージの関数は特定の関数系に対して、初期値を自動的に計算できるため、非線形回帰分析を線形回帰分析と同じくらいスムーズに実行できる。
下記に使用方法がまとまっており、ここでもその利用方法に従っている。
<- data.frame(
Beta x = c(1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50),
y = c(0, 0, 0, 7.7, 12.3, 19.7, 22.4, 20.3, 6.6, 0, 0)
)#
# Beta_nsl <- gsl_nls(
# fn = y ~ d * (((x-xb)/(xo-xb))*((xc-x)/(xc-xo))^((xc-xo)/(xo-xb)))^b,
# data = Beta,
# start = c(b = 1.291, d = 22.294, xb = 9.438, xc = 40.464, xo = 31.154),
# control = gsl_nls_control(maxiter = 1000),
# algorithm = 'lm'
# )
<- nls(y ~ NLS.beta(x, b, d, Xb, Xo, Xc), data = Beta)
model # summary(model)
cbind(
Beta, fit = predict(model, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Beta')
下記はNIST StRD Nonlinear Regressionでアーカイブされているデータを扱っている。レベルはHigherのみ。
<- gsl_nls(
Ratkowsky2_nsl fn = y ~ b1 / (1 + exp(b2 - b3 * x)),
data = Ratkowsky2,
start = c(b1 = 100, b2 = 1, b3 = 0.1),
control = gsl_nls_control(maxiter = 100)
)
#as.matrix(coef(Ratkowsky2_nsl))
cbind(
Ratkowsky2, predict(Ratkowsky2_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Ratkowsky2')
<- gsl_nls(
MGH09_nsl fn = y ~ (b1*(x^2 + b2*x)) / (x^2 + b3*x + b4),
data = MGH09,
start = c(b1 = 25, b2 = 39, b3 = 41.5, b4 = 39),
control = gsl_nls_control(maxiter = 100)
)
# as.matrix(coef(MGH09_nsl))
cbind(
MGH09, predict(MGH09_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: MGH09')
<- gsl_nls(
Thurber_nsl fn = y ~ (b1 + b2*x + b3*x^2 + b4*x^3)/(1 + b5*x + b6*x^2 + b7*x^3),
data = Thurber,
start = c(b1 = 1000, b2 = 1000, b3 = 400, b4 = 40, b5 = 0.7, b6 = 0.3, b7 = 0.03),
control = gsl_nls_control(maxiter = 100)
)# as.matrix(coef(Thurber_nsl))
cbind(
Thurber, predict(Thurber_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Thurber')
<- gsl_nls(
MGH10_nsl fn = y ~ b1 * exp(b2 / (x + b3)),
data = MGH10,
start = c(b1 = 2, b2 = 400000, b3 = 25000),
control = gsl_nls_control(maxiter = 100000)
)
# as.matrix(coef(MGH10_nsl))
cbind(
MGH10, predict(MGH10_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: MGH10')
<- gsl_nls(
Eckerle4_nsl fn = y ~ (b1/b2) * exp((-1/2)*((x-b3)/b2)^2),
data = Eckerle4,
start = c(b1 = 1, b2 = 10, b3 = 500),
control = gsl_nls_control(maxiter = 100)
)
# as.matrix(coef(Eckerle4_nsl))
cbind(
Eckerle4, predict(Eckerle4_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Eckerle4')
<- gsl_nls(
Ratkowsky3_nsl fn = y ~ b1 / (1 + exp(b2-b3*x))^(1/b4),
data = Ratkowsky3,
start = c(b1 = 100, b2 = 10, b3 = 1, b4 = 1),
control = gsl_nls_control(maxiter = 100)
)
# as.matrix(coef(Ratkowsky3_nsl))
cbind(
Ratkowsky3, predict(Ratkowsky3_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Ratkowsky3')
<- gsl_nls(
Bennett5_nsl fn = y ~ b1 * (b2 + x)^(-1/b3),
data = Bennett5,
start = c(b1 = -2000, b2 = 50, b3 = 0.8),
control = gsl_nls_control(maxiter = 100)
)
# as.matrix(coef(Bennett5_nsl))
cbind(
Bennett5, predict(Bennett5_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: Bennett5')
<- tribble(
BoxBOD ~y,~x,
109, 1,
149, 2,
149, 3,
191, 5,
213, 7,
224, 10
)
<- gsl_nls(
BoxBOD_nsl fn = y ~ b1 * (1 - exp(-1*b2 * x)),
data = BoxBOD,
start = c(b1 = 1, b2 = 1),
control = gsl_nls_control(maxiter = 100),
algorithm = 'lmaccel', # デフォルトのlm(levenberg-Marquardt)では収束しない
)
# as.matrix(coef(BoxBOD_nsl))
cbind(
BoxBOD, predict(BoxBOD_nsl, interval = "confidence", level = 0.95)
%>%
) ggplot(., aes(x, y))+
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2) +
geom_line(aes(x, fit),linetype = 'dashed') +
geom_point(col = 'red') +
ggtitle('Dataset: BoxBOD')