Rで回帰分析:一般化線形混合モデル(誤差構造:二項分布)

Rでデータサイエンス

一般化線形混合モデル(誤差構造:二項分布)

library(lme4)
library(dplyr)
c("lme4", "dplyr") %>%
    sapply(packageVersion)
$lme4
[1]  1  1 35  1

$dplyr
[1] 1 1 4

サンプルデータの作成

# サンプルデータの作成
set.seed(20230409)
# オッズ
## コントロール群
p_c <- 0.25
odds_c <- p_c/(1 - p_c)
## トリートメント群
p_t <- 0.85
odds_t <- p_t/(1 - p_t)
# オッズ比
or <- p_t/p_c
list(p_c = p_c, p_t = p_t, or = or)
$p_c
[1] 0.25

$p_t
[1] 0.85

$or
[1] 3.4

固定効果

n_id <- 10
n_plot <- 2
# 固定効果
b0 <- log(p_c)
b1 <- log(or)
list(b0 = b0, b1 = b1)
$b0
[1] -1.386294

$b1
[1] 1.223775

ランダム効果

# ランダム効果
effect_r_var <- 0.5
(effect_r <- rep(rnorm(n = n_id, mean = 0, sd = sqrt(effect_r_var)), each = n_plot))
 [1] -0.29359624 -0.29359624  0.05418604  0.05418604  0.05927669  0.05927669
 [7]  0.26410186  0.26410186 -0.74161404 -0.74161404  0.02671040  0.02671040
[13]  0.11138860  0.11138860 -0.72434538 -0.72434538  0.28579766  0.28579766
[19] -0.04971714 -0.04971714
(id <- rep(LETTERS[seq(n_id)], each = n_plot))
 [1] "A" "A" "B" "B" "C" "C" "D" "D" "E" "E" "F" "F" "G" "G" "H" "H" "I" "I" "J"
[20] "J"
(plot <- paste(id, rep(seq(n_plot), times = n_id), sep = "."))
 [1] "A.1" "A.2" "B.1" "B.2" "C.1" "C.2" "D.1" "D.2" "E.1" "E.2" "F.1" "F.2"
[13] "G.1" "G.2" "H.1" "H.2" "I.1" "I.2" "J.1" "J.2"
(group <- rep(c("treatment", "control"), times = n_id))
 [1] "treatment" "control"   "treatment" "control"   "treatment" "control"  
 [7] "treatment" "control"   "treatment" "control"   "treatment" "control"  
[13] "treatment" "control"   "treatment" "control"   "treatment" "control"  
[19] "treatment" "control"  
(df <- data.frame(id, plot, group))
   id plot     group
1   A  A.1 treatment
2   A  A.2   control
3   B  B.1 treatment
4   B  B.2   control
5   C  C.1 treatment
6   C  C.2   control
7   D  D.1 treatment
8   D  D.2   control
9   E  E.1 treatment
10  E  E.2   control
11  F  F.1 treatment
12  F  F.2   control
13  G  G.1 treatment
14  G  G.2   control
15  H  H.1 treatment
16  H  H.2   control
17  I  I.1 treatment
18  I  I.2   control
19  J  J.1 treatment
20  J  J.2   control
(log_odds = with(df, b0 + b1 * (group == "treatment") + effect_r))
 [1] -0.45611517 -1.67989060 -0.10833289 -1.33210833 -0.10324224 -1.32701767
 [7]  0.10158293 -1.12219250 -0.90413297 -2.12790840 -0.13580853 -1.35958396
[13] -0.05113033 -1.27490576 -0.88686430 -2.11063974  0.12327873 -1.10049670
[19] -0.21223607 -1.43601150
(prop <- plogis(log_odds))
 [1] 0.3879078 0.1571100 0.4729432 0.2088108 0.4742123 0.2096531 0.5253739
 [8] 0.2456048 0.2882019 0.1064137 0.4661000 0.2043079 0.4872202 0.2184186
[15] 0.2917573 0.1080670 0.5307807 0.2496468 0.4471393 0.1921638

サンプルサイズの設定

# サンプルサイズの設定
df$n_sample <- sample(90:100, size = n_id * n_plot, replace = T)
# 目的変数の設定
df$y <- rbinom(n = n_id * n_plot, size = df$n_sample, prob = prop)
df
   id plot     group n_sample  y
1   A  A.1 treatment       90 38
2   A  A.2   control       93 18
3   B  B.1 treatment      100 41
4   B  B.2   control      100 15
5   C  C.1 treatment       98 46
6   C  C.2   control      100 15
7   D  D.1 treatment       96 46
8   D  D.2   control       90 28
9   E  E.1 treatment       95 26
10  E  E.2   control      100 11
11  F  F.1 treatment       93 45
12  F  F.2   control       95 17
13  G  G.1 treatment       94 45
14  G  G.2   control       96 23
15  H  H.1 treatment       99 39
16  H  H.2   control       92  7
17  I  I.1 treatment       95 45
18  I  I.2   control       90 23
19  J  J.1 treatment       96 39
20  J  J.2   control       90 18

一般化線形混合モデル

  • 誤差構造:二項分布、リンク関数:ロジット
# 一般化線形混合モデル(誤差構造:二項分布、リンク関数:ロジット)
mod <- glmer(cbind(y, n_sample - y) ~ group + (1 | id), data = df, family = binomial(link = "logit"))
mod %>%
    summary()
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: cbind(y, n_sample - y) ~ group + (1 | id)
   Data: df

     AIC      BIC   logLik deviance df.resid 
   132.6    135.6    -63.3    126.6       17 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-1.94622 -0.47488  0.00054  0.56390  1.77992 

Random effects:
 Groups Name        Variance Std.Dev.
 id     (Intercept) 0.06173  0.2485  
Number of obs: 20, groups:  id, 10

Fixed effects:
               Estimate Std. Error z value            Pr(>|z|)    
(Intercept)     -1.4979     0.1153  -12.99 <0.0000000000000002 ***
grouptreatment   1.2083     0.1069   11.30 <0.0000000000000002 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr)
grouptrtmnt -0.577

固定効果(再掲)

# 固定効果(再掲)
list(b0 = b0, b1 = b1)
$b0
[1] -1.386294

$b1
[1] 1.223775

95%信頼区間

# 95%信頼区間
confint(mod, oldNames = F, level = 0.95)
                       2.5 %     97.5 %
sd_(Intercept)|id  0.1167963  0.4737878
(Intercept)       -1.7406043 -1.2645465
grouptreatment     1.0001973  1.4197445

参考引用資料

  1. https://aosmith.rbind.io/2020/08/20/simulate-binomial-glmm/
  2. https://stackoverflow.com/questions/55319539/glmer-output-from-r-meaning-of-sig01-sig02-sig03

最終更新

Sys.time()
[1] "2024-03-23 09:47:01 JST"

R、Quarto、Package

R.Version()$version.string
[1] "R version 4.3.3 (2024-02-29 ucrt)"
quarto::quarto_version()
[1] '1.4.542'
packageVersion(pkg = "tidyverse")
[1] '2.0.0'

著者