Generalized linear mixed-effects models

Binomial GLMM

The model

$Y_{ij} \sim Bernoulli(p_{ij})$, $i=1, 2, \ldots, n$ clusters, $j=1, 2, \ldots, m$ observetions within clusters, $logit(p_{ij}) = \beta_{0} + \beta_{1} X_{ij} + \alpha_{i}$, $\alpha_{i} \sim Normal(0, \sigma^2)$.

The problem

In these Neyman-Scott type problems, there are two types of asymptotics:

  • increase the number of clusters or
  • increase the number of observations in a cluster (keeping the number of clusters constant).

Of course, one can have both clusters and cluster size going to infinity but that is quite simple, unrealistic.

  • The first asymptotics leads to MLE for $\sigma^2$ that is consistent.
  • The second type of asymptotics does not lead to consistent MLE of $\sigma^2$. That was precisely the point in Kiefer and Wolfowitz (PDF from Project Euclid).

The second type of asymptotics where observations within a cluster increases but not the number of clusters, there are only fixed effects corresponding to $\alpha_{i}$. In this asymptotics, the information about $\sigma^2$ is not increasing and hence one cannot obtain consistent estimator.

Data generation

set.seed(150467)
n <- 100
m <- 2
beta <- c(1.5, -1)
sigma_sq <- 1
x <- runif(n * m, min = 0, max = 1)
g <- rep(1:n, each = 2)
alpha <- rnorm(n, mean = 0, sd = sqrt(sigma_sq))[g]
mu <- alpha + beta[1] + beta[2] * x
p <- exp(mu) / (1 + exp(mu))
Y <- rbinom(n * m, 1, p)
dim(Y) <- c(n, m)
summary(Y)

Cloning clusters

Cloning goes with n.

library(dclone)
model <- custommodel("model {
    for (i in 1:n) {
        for (j in 1:m) {
            Y[i,j] ~ dbern(p[i,j])
            logit(p[i,j]) <- alpha[i] + beta[1] + beta[2] * X[i,j]
        }
        alpha[i] ~ dnorm(0, 1 / sigma_sq)
    }
    beta[1] ~ dnorm(0, 0.01)
    beta[2] ~ dnorm(0, 0.01)
    sigma_sq ~ dgamma(0.001, 0.001)
}")
dat <- list(Y = Y, 
    X = matrix(x, n, m), 
    n = n, m = m)
dcfit1 <- dc.fit(data = dat, 
    params = c("beta", "sigma_sq"), 
    model = model, n.iter = 1000,
    n.clones = c(1, 2, 4, 8),
    unchanged = "m", multiply = "n")

Alternative data cloning approach:

model <- custommodel("model {
  for (k in 1:K) {
    for (i in 1:n) {
      for (j in 1:m) {
        Y[i,j,k] ~ dbern(p[i,j,k])
        logit(p[i,j,k]) <- alpha[i,k] + beta[1] + beta[2] * X[i,j]
      }
      alpha[i,k] ~ dnorm(0, 1 / sigma_sq)
    }
  }
  beta[1] ~ dnorm(0, 0.01)
  beta[2] ~ dnorm(0, 0.01)
  sigma_sq ~ dgamma(0.001, 0.001)
}")
dat <- list(Y = dcdim(array(Y, c(n, m, 1))), 
    X = matrix(x, n, m), 
    n = n, m = m, K = 1)
dcfit2 <- dc.fit(data = dat, 
    params = c("beta", "sigma_sq"), 
    model = model, n.iter = 1000,
    n.clones = c(1, 2, 4, 8),
    unchanged = c("n", "m", "X"), multiply = "K")

Cloning replicates within clusters

Cloning goes with m, matrices transposed.

model <- custommodel("model {
    for (i in 1:n) {
        for (j in 1:m) {
            Y[j,i] ~ dbern(p[j,i])
            logit(p[j,i]) <- alpha[i] + beta[1] + beta[2] * X[j,i]
        }
        alpha[i] ~ dnorm(0, 1 / sigma_sq)
    }
    beta[1] ~ dnorm(0, 0.01)
    beta[2] ~ dnorm(0, 0.01)
    sigma_sq ~ dgamma(0.001, 0.001)
}")
dat <- list(Y = t(Y), 
    X = t(matrix(x, n, m)), 
    n = n, m = m)
dcfit3 <- dc.fit(data = dat, 
    params = c("beta", "sigma_sq"), 
    model = model, n.iter = 1000,
    n.clones = c(1, 2, 4, 8),
    unchanged = "n", multiply = "m")

Compare disgnostic results

dcdiag(dcfit1)
dcdiag(dcfit2)
dcdiag(dcfit3)

Poisson GLMM

The model: $(Y_{i} \mid \lambda_i) \sim Poisson(\lambda_{i})$, $i = 1, 2, \ldots, n$, $log(\lambda_i) = \alpha_i + \mathbf{X}^{\top}_{i} \mathbf{\beta}$, and $\alpha_i \sim Normal(0, \sigma^2)$.

Data generation

set.seed(1234)
n <- 20
beta <- c(1, -1)
sigma_sq <- 0.5
x <- rnorm(n)
X <- model.matrix(~x)
alpha <- rnorm(n, mean = 0, sd = sqrt(sigma_sq))
lambda <- exp(alpha + X %*% beta)
Y <- rpois(n, lambda = lambda)
table(Y)

Bayesian analysis

model <- custommodel("model {
    for (i in 1:n) {
        Y[i] ~ dpois(lambda[i])
        log(lambda[i]) <- alpha[i] + inprod(X[i,], beta)
        alpha[i] ~ dnorm(0, 1 / sigma_sq)
    }
    for (j in 1:px) {
        beta[j] ~ dnorm(0, 0.001)
    }
    log_sigma ~ dnorm(0, 0.001)
    sigma_sq <- exp(log_sigma)^2
}")
dat <- list(Y = Y, X = X, n = n, px = ncol(X))
fit <- jags.fit(data = dat, 
    params = c("beta", "sigma_sq"), 
    model = model)

Data cloning

dcfit <- dc.fit(data = dat, 
    params = c("beta", "sigma_sq"), 
    model = model, n.iter = 1000,
    n.clones = c(1, 2, 4, 8),
    unchanged = "px", multiply = "n")
dcdiag(dcfit)
coef(dcfit)

Prediction

model <- custommodel("model {
    for (i in 1:n) {
        Y[i] ~ dpois(lambda[i])
        log(lambda[i]) <- alpha[i] + inprod(X[i,], beta)
        alpha[i] ~ dnorm(0, 1 / sigma_sq)
    }
    for (j in 1:px) {
        beta[j] ~ dnorm(0, 0.1)
    }
    log_sigma ~ dnorm(10, 1)
    sigma_sq <- exp(log_sigma)^2
}")
dat <- list(Y = Y, X = X, n = n, px = ncol(X))
pred1 <- jags.fit(data = dat, 
    params = "alpha", 
    model = model)
model <- custommodel("model {
    for (i in 1:n) {
        Y[i] ~ dpois(lambda[i])
        log(lambda[i]) <- alpha[i] + inprod(X[i,], beta)
        alpha[i] ~ dnorm(0, 1 / sigma_sq)
    }
    param[1:(px + 1)] ~ dmnorm(cf[], V[,])
    for (j in 1:px) {
        beta[j] <- param[j]
    }
    log_sigma <- param[px + 1]
    sigma_sq <- exp(log_sigma)^2
}")
dat <- list(Y = Y, X = X, n = n, px = ncol(X),
    cf = coef(dcfit), V = solve(vcov(dcfit)))
pred2 <- jags.fit(data = dat, 
    params = "alpha", 
    model = model)

alpha_b <- cbind(est=coef(pred1),
    t(quantile(pred1, probs = c(0.025, 0.975))))
alpha_dc <- cbind(est=coef(pred2),
    t(quantile(pred2, probs = c(0.025, 0.975))))

plot(1:n, alpha[order(alpha)], type = "l", 
    ylim = range(alpha, alpha_b, alpha_dc))
points(1:n - 0.2, alpha_b[order(alpha),1], 
    col = 2, pch = 19, cex = 0.5)
segments(x0 = 1:n - 0.2, x1 = 1:n - 0.2,
    y0 = alpha_b[order(alpha),2], 
    y1 = alpha_b[order(alpha),3], col = 2)
points(1:n + 0.2, alpha_dc[order(alpha),1], 
    col=4, pch = 19, cex = 0.5)
segments(x0 = 1:n + 0.2, x1 = 1:n + 0.2,
    y0 = alpha_dc[order(alpha),2], 
    y1 = alpha_dc[order(alpha),3], col = 4)

table(rowSums(sign(alpha - alpha_b[,-1]))) / n
table(rowSums(sign(alpha - alpha_dc[,-1]))) / n

Use different prior distributions on the parameters and see how they affect the coverage.

What have we learnt?

  • Hierarchical models: Linear mixed models, measurement error.
  • Latent variables versus parameters.
  • Estimation and inference for the parameters.
  • Prediction and inference (coverage) for the latent variables.